@@ -25,10 +25,8 @@ const SAMPLE_RATE = 48000; // 48khz is what WebRTC uses. 12khz is where we lose
2525const BITRATE = 24000 ; // 24kbps is pretty high quality for our use case in opus.
2626const FREQ_SAMPLE_RATE = 10 ; // Target rate of frequency data (samples / sec). We don't need this super often.
2727
28- export interface IFrequencyPackage {
29- dbBars : Float32Array ;
30- dbMin : number ;
31- dbMax : number ;
28+ export interface IRecordingUpdate {
29+ waveform : number [ ] ; // floating points between 0 (low) and 1 (high).
3230
3331 // TODO: @@ TravisR: Generalize this for a timing package?
3432}
@@ -38,11 +36,11 @@ export class VoiceRecorder {
3836 private recorderContext : AudioContext ;
3937 private recorderSource : MediaStreamAudioSourceNode ;
4038 private recorderStream : MediaStream ;
41- private recorderFreqNode : AnalyserNode ;
39+ private recorderFFT : AnalyserNode ;
4240 private buffer = new Uint8Array ( 0 ) ;
4341 private mxc : string ;
4442 private recording = false ;
45- private observable : SimpleObservable < IFrequencyPackage > ;
43+ private observable : SimpleObservable < IRecordingUpdate > ;
4644 private freqTimerId : number ;
4745
4846 public constructor ( private client : MatrixClient ) {
@@ -64,8 +62,16 @@ export class VoiceRecorder {
6462 sampleRate : SAMPLE_RATE , // once again, the browser will resample for us
6563 } ) ;
6664 this . recorderSource = this . recorderContext . createMediaStreamSource ( this . recorderStream ) ;
67- this . recorderFreqNode = this . recorderContext . createAnalyser ( ) ;
68- this . recorderSource . connect ( this . recorderFreqNode ) ;
65+ this . recorderFFT = this . recorderContext . createAnalyser ( ) ;
66+
67+ // Bring the FFT time domain down a bit. The default is 2048, and this must be a power
68+ // of two. We use 64 points because we happen to know down the line we need less than
69+ // that, but 32 would be too few. Large numbers are not helpful here and do not add
70+ // precision: they introduce higher precision outputs of the FFT (frequency data), but
71+ // it makes the time domain less than helpful.
72+ this . recorderFFT . fftSize = 64 ;
73+
74+ this . recorderSource . connect ( this . recorderFFT ) ;
6975 this . recorder = new Recorder ( {
7076 encoderPath, // magic from webpack
7177 encoderSampleRate : SAMPLE_RATE ,
@@ -91,7 +97,7 @@ export class VoiceRecorder {
9197 } ;
9298 }
9399
94- public get frequencyData ( ) : SimpleObservable < IFrequencyPackage > {
100+ public get liveData ( ) : SimpleObservable < IRecordingUpdate > {
95101 if ( ! this . recording ) throw new Error ( "No observable when not recording" ) ;
96102 return this . observable ;
97103 }
@@ -121,16 +127,35 @@ export class VoiceRecorder {
121127 if ( this . observable ) {
122128 this . observable . close ( ) ;
123129 }
124- this . observable = new SimpleObservable < IFrequencyPackage > ( ) ;
130+ this . observable = new SimpleObservable < IRecordingUpdate > ( ) ;
125131 await this . makeRecorder ( ) ;
126132 this . freqTimerId = setInterval ( ( ) => {
127133 if ( ! this . recording ) return ;
128- const data = new Float32Array ( this . recorderFreqNode . frequencyBinCount ) ;
129- this . recorderFreqNode . getFloatFrequencyData ( data ) ;
134+
135+ // The time domain is the input to the FFT, which means we use an array of the same
136+ // size. The time domain is also known as the audio waveform. We're ignoring the
137+ // output of the FFT here (frequency data) because we're not interested in it.
138+ //
139+ // We use bytes out of the analyser because floats have weird precision problems
140+ // and are slightly more difficult to work with. The bytes are easy to work with,
141+ // which is why we pick them (they're also more precise, but we care less about that).
142+ const data = new Uint8Array ( this . recorderFFT . fftSize ) ;
143+ this . recorderFFT . getByteTimeDomainData ( data ) ;
144+
145+ // Because we're dealing with a uint array we need to do math a bit differently.
146+ // If we just `Array.from()` the uint array, we end up with 1s and 0s, which aren't
147+ // what we're after. Instead, we have to use a bit of manual looping to correctly end
148+ // up with the right values
149+ const translatedData : number [ ] = [ ] ;
150+ for ( let i = 0 ; i < data . length ; i ++ ) {
151+ // All we're doing here is inverting the amplitude and putting the metric somewhere
152+ // between zero and one. Without the inversion, lower values are "louder", which is
153+ // not super helpful.
154+ translatedData . push ( 1 - ( data [ i ] / 128.0 ) ) ;
155+ }
156+
130157 this . observable . update ( {
131- dbBars : data ,
132- dbMin : this . recorderFreqNode . minDecibels ,
133- dbMax : this . recorderFreqNode . maxDecibels ,
158+ waveform : translatedData ,
134159 } ) ;
135160 } , 1000 / FREQ_SAMPLE_RATE ) as any as number ; // XXX: Linter doesn't understand timer environment
136161 await this . recorder . start ( ) ;
0 commit comments