Actually use a waveform instead of the frequency data

turt2live · turt2live · commit 449e028bbd1d · 2021-03-24T23:31:02.000-06:00
diff --git a/res/css/_components.scss b/res/css/_components.scss
@@ -246,7 +246,7 @@
 @import "./views/toasts/_AnalyticsToast.scss";
 @import "./views/toasts/_NonUrgentEchoFailureToast.scss";
 @import "./views/verification/_VerificationShowSas.scss";
-@import "./views/voice_messages/_FrequencyBars.scss";
+@import "./views/voice_messages/_Waveform.scss";
 @import "./views/voip/_CallContainer.scss";
 @import "./views/voip/_CallView.scss";
 @import "./views/voip/_DialPad.scss";
diff --git a/res/css/views/voice_messages/_Waveform.scss b/res/css/views/voice_messages/_Waveform.scss
@@ -14,14 +14,14 @@ See the License for the specific language governing permissions and
 limitations under the License.
 */
 
-.mx_FrequencyBars {
+.mx_Waveform {
     position: relative;
     height: 30px; // tallest bar can only be 30px
 
     display: flex;
     align-items: center; // so the bars grow from the middle
 
-    .mx_FrequencyBars_bar {
+    .mx_Waveform_bar {
         width: 2px;
         margin-left: 1px;
         margin-right: 1px;
diff --git a/src/components/views/rooms/VoiceRecordComposerTile.tsx b/src/components/views/rooms/VoiceRecordComposerTile.tsx
@@ -21,7 +21,7 @@ import {VoiceRecorder} from "../../../voice/VoiceRecorder";
 import {Room} from "matrix-js-sdk/src/models/room";
 import {MatrixClientPeg} from "../../../MatrixClientPeg";
 import classNames from "classnames";
-import FrequencyBars from "../voice_messages/FrequencyBars";
+import LiveRecordingWaveform from "../voice_messages/LiveRecordingWaveform";
 
 interface IProps {
     room: Room;
@@ -68,16 +68,16 @@ export default class VoiceRecordComposerTile extends React.PureComponent<IProps,
             'mx_VoiceRecordComposerTile_stop': !!this.state.recorder,
         });
 
-        let bars = null;
+        let waveform = null;
         let tooltip = _t("Record a voice message");
         if (!!this.state.recorder) {
             // TODO: @@ TravisR: Change to match behaviour
             tooltip = _t("Stop & send recording");
-            bars = <FrequencyBars recorder={this.state.recorder} />;
+            waveform = <LiveRecordingWaveform recorder={this.state.recorder} />;
         }
 
         return (<>
-            {bars}
+            {waveform}
             <AccessibleTooltipButton
                 className={classes}
                 onClick={this.onStartStopVoiceMessage}
diff --git a/src/components/views/voice_messages/FrequencyBars.tsx b/src/components/views/voice_messages/FrequencyBars.tsx
diff --git a/src/components/views/voice_messages/LiveRecordingWaveform.tsx b/src/components/views/voice_messages/LiveRecordingWaveform.tsx
@@ -0,0 +1,64 @@
+/*
+Copyright 2021 The Matrix.org Foundation C.I.C.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+import React from "react";
+import {IRecordingUpdate, VoiceRecorder} from "../../../voice/VoiceRecorder";
+import {replaceableComponent} from "../../../utils/replaceableComponent";
+import {arrayFastResample, arraySeed} from "../../../utils/arrays";
+import {clamp, percentageOf} from "../../../utils/numbers";
+import Waveform from "./Waveform";
+
+interface IProps {
+    recorder: VoiceRecorder;
+}
+
+interface IState {
+    heights: number[];
+}
+
+const DOWNSAMPLE_TARGET = 35; // number of bars we want
+
+/**
+ * A waveform which shows the waveform of a live recording
+ */
+@replaceableComponent("views.voice_messages.LiveRecordingWaveform")
+export default class LiveRecordingWaveform extends React.PureComponent<IProps, IState> {
+    public constructor(props) {
+        super(props);
+
+        this.state = {heights: arraySeed(0, DOWNSAMPLE_TARGET)};
+        this.props.recorder.liveData.onUpdate(this.onRecordingUpdate);
+    }
+
+    private onRecordingUpdate = (update: IRecordingUpdate) => {
+        // The waveform and the downsample target are pretty close, so we should be fine to
+        // do this, despite the docs on arrayFastResample.
+        const bars = arrayFastResample(Array.from(update.waveform), DOWNSAMPLE_TARGET);
+        this.setState({
+            // The incoming data is between zero and one, but typically even screaming into a
+            // microphone won't send you over 0.6, so we "cap" the graph at about 0.4 for a
+            // point where the average user can still see feedback and be perceived as peaking
+            // when talking "loudly".
+            //
+            // We multiply by 100 because the Waveform component wants values in 0-100 (percentages)
+            heights: bars.map(b => percentageOf(b, 0, 0.40) * 100),
+        });
+    };
+
+    public render() {
+        return <Waveform heights={this.state.heights} />;
+    }
+}
diff --git a/src/components/views/voice_messages/Waveform.tsx b/src/components/views/voice_messages/Waveform.tsx
@@ -0,0 +1,48 @@
+/*
+Copyright 2021 The Matrix.org Foundation C.I.C.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+import React from "react";
+import {IRecordingUpdate, VoiceRecorder} from "../../../voice/VoiceRecorder";
+import {replaceableComponent} from "../../../utils/replaceableComponent";
+import {arrayFastResample, arraySeed} from "../../../utils/arrays";
+import {percentageOf} from "../../../utils/numbers";
+
+interface IProps {
+    heights: number[]; // percentages as integers (0-100)
+}
+
+interface IState {
+}
+
+/**
+ * A simple waveform component. This renders bars (centered vertically) for each
+ * height provided in the component properties. Updating the properties will update
+ * the rendered waveform.
+ */
+@replaceableComponent("views.voice_messages.Waveform")
+export default class Waveform extends React.PureComponent<IProps, IState> {
+    public constructor(props) {
+        super(props);
+    }
+
+    public render() {
+        return <div className='mx_Waveform'>
+            {this.props.heights.map((h, i) => {
+                return <span key={i} style={{height: h + '%'}} className='mx_Waveform_bar' />;
+            })}
+        </div>;
+    }
+}
diff --git a/src/voice/VoiceRecorder.ts b/src/voice/VoiceRecorder.ts
@@ -25,10 +25,8 @@ const SAMPLE_RATE = 48000; // 48khz is what WebRTC uses. 12khz is where we lose
 const BITRATE = 24000; // 24kbps is pretty high quality for our use case in opus.
 const FREQ_SAMPLE_RATE = 10; // Target rate of frequency data (samples / sec). We don't need this super often.
 
-export interface IFrequencyPackage {
-    dbBars: Float32Array;
-    dbMin: number;
-    dbMax: number;
+export interface IRecordingUpdate {
+    waveform: number[]; // floating points between 0 (low) and 1 (high).
 
     // TODO: @@ TravisR: Generalize this for a timing package?
 }
@@ -38,11 +36,11 @@ export class VoiceRecorder {
     private recorderContext: AudioContext;
     private recorderSource: MediaStreamAudioSourceNode;
     private recorderStream: MediaStream;
-    private recorderFreqNode: AnalyserNode;
+    private recorderFFT: AnalyserNode;
     private buffer = new Uint8Array(0);
     private mxc: string;
     private recording = false;
-    private observable: SimpleObservable<IFrequencyPackage>;
+    private observable: SimpleObservable<IRecordingUpdate>;
     private freqTimerId: number;
 
     public constructor(private client: MatrixClient) {
@@ -64,8 +62,16 @@ export class VoiceRecorder {
             sampleRate: SAMPLE_RATE, // once again, the browser will resample for us
         });
         this.recorderSource = this.recorderContext.createMediaStreamSource(this.recorderStream);
-        this.recorderFreqNode = this.recorderContext.createAnalyser();
-        this.recorderSource.connect(this.recorderFreqNode);
+        this.recorderFFT = this.recorderContext.createAnalyser();
+
+        // Bring the FFT time domain down a bit. The default is 2048, and this must be a power
+        // of two. We use 64 points because we happen to know down the line we need less than
+        // that, but 32 would be too few. Large numbers are not helpful here and do not add
+        // precision: they introduce higher precision outputs of the FFT (frequency data), but
+        // it makes the time domain less than helpful.
+        this.recorderFFT.fftSize = 64;
+
+        this.recorderSource.connect(this.recorderFFT);
         this.recorder = new Recorder({
             encoderPath, // magic from webpack
             encoderSampleRate: SAMPLE_RATE,
@@ -91,7 +97,7 @@ export class VoiceRecorder {
         };
     }
 
-    public get frequencyData(): SimpleObservable<IFrequencyPackage> {
+    public get liveData(): SimpleObservable<IRecordingUpdate> {
         if (!this.recording) throw new Error("No observable when not recording");
         return this.observable;
     }
@@ -121,16 +127,35 @@ export class VoiceRecorder {
         if (this.observable) {
             this.observable.close();
         }
-        this.observable = new SimpleObservable<IFrequencyPackage>();
+        this.observable = new SimpleObservable<IRecordingUpdate>();
         await this.makeRecorder();
         this.freqTimerId = setInterval(() => {
             if (!this.recording) return;
-            const data = new Float32Array(this.recorderFreqNode.frequencyBinCount);
-            this.recorderFreqNode.getFloatFrequencyData(data);
+
+            // The time domain is the input to the FFT, which means we use an array of the same
+            // size. The time domain is also known as the audio waveform. We're ignoring the
+            // output of the FFT here (frequency data) because we're not interested in it.
+            //
+            // We use bytes out of the analyser because floats have weird precision problems
+            // and are slightly more difficult to work with. The bytes are easy to work with,
+            // which is why we pick them (they're also more precise, but we care less about that).
+            const data = new Uint8Array(this.recorderFFT.fftSize);
+            this.recorderFFT.getByteTimeDomainData(data);
+
+            // Because we're dealing with a uint array we need to do math a bit differently.
+            // If we just `Array.from()` the uint array, we end up with 1s and 0s, which aren't
+            // what we're after. Instead, we have to use a bit of manual looping to correctly end
+            // up with the right values
+            const translatedData: number[] = [];
+            for (let i = 0; i < data.length; i++) {
+                // All we're doing here is inverting the amplitude and putting the metric somewhere
+                // between zero and one. Without the inversion, lower values are "louder", which is
+                // not super helpful.
+                translatedData.push(1 - (data[i] / 128.0));
+            }
+
             this.observable.update({
-                dbBars: data,
-                dbMin: this.recorderFreqNode.minDecibels,
-                dbMax: this.recorderFreqNode.maxDecibels,
+                waveform: translatedData,
             });
         }, 1000 / FREQ_SAMPLE_RATE) as any as number; // XXX: Linter doesn't understand timer environment
         await this.recorder.start();