diff --git a/backend/src/features.py b/backend/src/features.py index ffa3516c..913dcfef 100644 --- a/backend/src/features.py +++ b/backend/src/features.py @@ -114,7 +114,6 @@ def insert(self, y): chroma[:] = 1 length = 12**(0.5) chroma = chroma / length - return chroma diff --git a/frontend/companion-app/App.tsx b/frontend/companion-app/App.tsx index 195c594d..ff695345 100644 --- a/frontend/companion-app/App.tsx +++ b/frontend/companion-app/App.tsx @@ -15,6 +15,8 @@ import reducer_function from "./Dispatch"; import ScoreDisplay from "./components/ScoreDisplay"; import { SynthesizeButton } from "./components/SynthesizeButton"; import Icon from 'react-native-vector-icons/Feather'; +import { ChromaMaker } from "./utils/features"; +import FontAwesome from 'react-native-vector-icons/FontAwesome'; // Define the main application component export default function App() { @@ -61,6 +63,56 @@ export default function App() { setSessionToken(newToken) }, []); + // Initialize the chroma state as an array of 12 zeros (used to capture chroma vector at each chunk of audio). + const [chroma, setChroma] = useState(new Array(12).fill(0)); + const [started, setStarted] = useState(false); // state used to determine user selects live microphone option or not + + useEffect(() => { + let audioCtx: AudioContext; // Declare a reference to the AudioContext, which manages all audio processing + let micStream: MediaStream; // Declare a reference to the MediaStream from the user's microphone + + const initAudio = async () => { + try { + micStream = await navigator.mediaDevices.getUserMedia({ audio: true }); // Request access to user's microphone + audioCtx = new AudioContext(); // Create a new AudioContext for audio processing + await audioCtx.audioWorklet.addModule('../utils/mic-processor.js'); // Load the custom AudioWorkletProcessor + const source = audioCtx.createMediaStreamSource(micStream); // Create a source node from the microphone stream + const workletNode = new AudioWorkletNode(audioCtx, 'mic-processor'); // Create an AudioWorkletNode linked to our custom 'mic-processor' + source.connect(workletNode); // Connect the mic source to the worklet + workletNode.connect(audioCtx.destination); // connect worklet to output + + // Initialize the ChromaMaker for extracting chroma features + const n_fft = 4096; + const chromaMaker = new ChromaMaker(audioCtx.sampleRate, n_fft); + + // Handle incoming audio chunks from the worklet + workletNode.port.onmessage = (event) => { + const audioChunk = event.data as Float32Array; + try { + // Extract chroma features and update state + const chromaResult = chromaMaker.insert(audioChunk); + setChroma(chromaResult); + } catch (e) { + console.error('Chroma extraction error:', e); + } + }; + } catch (err) { + console.error('Failed to initialize audio:', err); + } + }; + // If "started" state is true, initialize audio processing + if (started) { + initAudio(); + } + + // Cleanup: when the component unmounts or `started` becomes false, + // stop the microphone stream and close the audio context to free up resources + return () => { + if (micStream) micStream.getTracks().forEach((track) => track.stop()); + if (audioCtx) audioCtx.close(); + }; + }, [started]); + //////////////////////////////////////////////////////////////////////////////// // The lines below were modified, copied and pasted out of the audio recorder object // (which never really needed a UI). @@ -70,112 +122,112 @@ export default function App() { // Audio-related states and refs // State for whether we have microphone permissions - is set to true on first trip to playmode - const [permission, setPermission] = useState(false); - // Assorted audio-related objects in need of reference - // Tend to be re-created upon starting a recording - const mediaRecorder = useRef( - new MediaRecorder(new MediaStream()), - ); - const [stream, setStream] = useState(new MediaStream()); - const [audioChunks, setAudioChunks] = useState([]); + // const [permission, setPermission] = useState(false); + // // Assorted audio-related objects in need of reference + // // Tend to be re-created upon starting a recording + // const mediaRecorder = useRef( + // new MediaRecorder(new MediaStream()), + // ); + // const [stream, setStream] = useState(new MediaStream()); + // const [audioChunks, setAudioChunks] = useState([]); - const audioContextRef = useRef(null); - const analyserRef = useRef(null); - const dataArrayRef = useRef(null); - const startTimeRef = useRef(null); + // const audioContextRef = useRef(null); + // const analyserRef = useRef(null); + // const dataArrayRef = useRef(null); + // const startTimeRef = useRef(null); - // Audio-related functions - ///////////////////////////////////////////////////////// - // This function sends a synchronization request and updates the state with the result - const UPDATE_INTERVAL = 100; + // // Audio-related functions + // ///////////////////////////////////////////////////////// + // // This function sends a synchronization request and updates the state with the result + // const UPDATE_INTERVAL = 100; - const getAPIData = async () => { - analyserRef.current?.getByteTimeDomainData(dataArrayRef.current); - const { - playback_rate: newPlayRate, - estimated_position: estimated_position, - } = await synchronize(state.sessionToken, Array.from(dataArrayRef.current), state.timestamp); + // const getAPIData = async () => { + // analyserRef.current?.getByteTimeDomainData(dataArrayRef.current); + // const { + // playback_rate: newPlayRate, + // estimated_position: estimated_position, + // } = await synchronize(state.sessionToken, Array.from(dataArrayRef.current), state.timestamp); - dispatch({ - type: "increment", - time: estimated_position, - rate: newPlayRate, - }); - } + // dispatch({ + // type: "increment", + // time: estimated_position, + // rate: newPlayRate, + // }); + // } - // This function established new recording instances when re-entering play mode - const startRecording = async () => { - // It's possible some of these can be removed; not sure which relate to the - // making of the recorded object we don't need and which relate to the - // buffer we send to the backend. - startTimeRef.current = Date.now(); - //create new Media recorder instance using the stream - const media = new MediaRecorder(stream, { mimeType: "audio/webm" }); - //set the MediaRecorder instance to the mediaRecorder ref - mediaRecorder.current = media; - //invokes the start method to start the recording process - mediaRecorder.current.start(); - let localAudioChunks: Blob[] = []; - mediaRecorder.current.ondataavailable = (event) => { - if (typeof event.data === "undefined") return; - if (event.data.size === 0) return; - localAudioChunks.push(event.data); - }; - setAudioChunks(localAudioChunks); + // // This function established new recording instances when re-entering play mode + // const startRecording = async () => { + // // It's possible some of these can be removed; not sure which relate to the + // // making of the recorded object we don't need and which relate to the + // // buffer we send to the backend. + // startTimeRef.current = Date.now(); + // //create new Media recorder instance using the stream + // const media = new MediaRecorder(stream, { mimeType: "audio/webm" }); + // //set the MediaRecorder instance to the mediaRecorder ref + // mediaRecorder.current = media; + // //invokes the start method to start the recording process + // mediaRecorder.current.start(); + // let localAudioChunks: Blob[] = []; + // mediaRecorder.current.ondataavailable = (event) => { + // if (typeof event.data === "undefined") return; + // if (event.data.size === 0) return; + // localAudioChunks.push(event.data); + // }; + // setAudioChunks(localAudioChunks); - audioContextRef.current = new window.AudioContext(); - const source = audioContextRef.current.createMediaStreamSource(stream); - analyserRef.current = audioContextRef.current.createAnalyser(); - analyserRef.current.fftSize = 2048; - source.connect(analyserRef.current); + // audioContextRef.current = new window.AudioContext(); + // const source = audioContextRef.current.createMediaStreamSource(stream); + // analyserRef.current = audioContextRef.current.createAnalyser(); + // analyserRef.current.fftSize = 2048; + // source.connect(analyserRef.current); - const bufferLength = analyserRef.current.frequencyBinCount; - dataArrayRef.current = new Uint8Array(bufferLength); + // const bufferLength = analyserRef.current.frequencyBinCount; + // dataArrayRef.current = new Uint8Array(bufferLength); - getAPIData(); // run the first call - }; + // getAPIData(); // run the first call + // }; - //stops the recording instance - const stopRecording = () => { - mediaRecorder.current.stop(); - audioContextRef.current?.close(); - }; + // //stops the recording instance + // const stopRecording = () => { + // mediaRecorder.current.stop(); + // audioContextRef.current?.close(); + // }; - // Function to get permission to use browser microphone - const getMicrophonePermission = async () => { - if ("MediaRecorder" in window) { - try { - const streamData = await navigator.mediaDevices.getUserMedia({ - audio: true, - video: false, - }); - setPermission(true); - setStream(streamData); - } catch (err) { - alert((err as Error).message); - } - } else { - alert("The MediaRecorder API is not supported in your browser."); - } - }; + // // Function to get permission to use browser microphone + // const getMicrophonePermission = async () => { + // if ("MediaRecorder" in window) { + // try { + // const streamData = await navigator.mediaDevices.getUserMedia({ + // audio: true, + // video: false, + // }); + // setPermission(true); + // setStream(streamData); + // } catch (err) { + // alert((err as Error).message); + // } + // } else { + // alert("The MediaRecorder API is not supported in your browser."); + // } + // }; - ///////////////////////////////////////////// - // Audio-related effects - // Get microphone permission on first time entering play state - useEffect(() => { - if (!permission) getMicrophonePermission(); - }, [state.inPlayMode]); + // ///////////////////////////////////////////// + // // Audio-related effects + // // Get microphone permission on first time entering play state + // useEffect(() => { + // if (!permission) getMicrophonePermission(); + // }, [state.inPlayMode]); - // Start and stop recording when player is or isn't playing - useEffect(() => { - if (state.playing) startRecording(); - else stopRecording(); - }, [state.playing]); + // // Start and stop recording when player is or isn't playing + // useEffect(() => { + // if (state.playing) startRecording(); + // else stopRecording(); + // }, [state.playing]); - // Keep synchronizing while playing - useEffect(() => { - if (state.playing) setTimeout(getAPIData, UPDATE_INTERVAL); - }, [state.timestamp]) + // // Keep synchronizing while playing + // useEffect(() => { + // if (state.playing) setTimeout(getAPIData, UPDATE_INTERVAL); + // }, [state.timestamp]) // State to conditionally render the style type of the components (can only be "light" or "dark") const [theme, setTheme] = useState<"light" | "dark">("light"); @@ -255,7 +307,6 @@ export default function App() { // Boolean used for dynmaic display (row or column) const isSmallScreen = width < 960; - //////////////////////////////////////////////////////////////////////////////// // Render the component's UI //////////////////////////////////////////////////////////////////////////////// @@ -264,10 +315,20 @@ export default function App() { {/* Header with image */} - - - + + setStarted(!started)}> + + + + + + + {/* Provides safe area insets for mobile devices */} @@ -326,41 +387,17 @@ export default function App() { - - - {/* Footer display for status */} {/* Automatically adjusts the status bar style */} - ); } -// Theme-based styles (not needed since we have animated API to do light and dark transitions smoother) -// const themeStyles = { -// light: { -// container: { backgroundColor: '#F5F5F5' }, -// menu_bar: { backgroundColor: '#2C3E50' }, -// sidebar: { backgroundColor: '#ECF0F1' }, -// mainContent: { backgroundColor: '#FFFFFF' }, -// text: { color: "#2C3E50", fontWeight: "bold"} as TextStyle, // use for typscirpt syntax -// button: { backgroundColor: "#2C3E50"} -// }, -// dark: { -// container: { backgroundColor: '#0F0F0F' }, -// menu_bar: { backgroundColor: '#1A252F' }, -// sidebar: { backgroundColor: '#4A627A' }, -// mainContent: { backgroundColor: '#6B87A3' }, -// text: { color: '#ffffff', fontWeight: "bold"} as TextStyle, // use for typscirpt syntax -// button: { backgroundColor: "#ffffff"} -// }, -// }; - // Define styles for the components using StyleSheet const styles = StyleSheet.create({ diff --git a/frontend/companion-app/Dispatch.ts b/frontend/companion-app/Dispatch.ts index 80c9385c..a239c26d 100644 --- a/frontend/companion-app/Dispatch.ts +++ b/frontend/companion-app/Dispatch.ts @@ -100,15 +100,17 @@ const reducer_function = (state: any, action: any) => { }, }; - // Adds uploaded score's name to list - case "new_score_from_upload": - return { - ...state, - ...{ - scores: [...state.scores, action.score], - score: action.score.filename, - }, - }; + case "new_score_from_upload": + return { + ...state, // Keep the existing state + scores: [...state.scores, action.score.filename], // Add the new score filename to the scores array + score: action.score.filename, // Set the current score to the newly uploaded filename + scoreContents: { + ...state.scoreContents, // Keep existing score content + [action.score.filename]: action.score.content, // Add the new score content to the scoreContents object using the filename as the key + }, + }; + default: // If no valid type, return state, otherwise the function returns null and the state is gone. return state; } diff --git a/frontend/companion-app/components/ScoreDisplay.tsx b/frontend/companion-app/components/ScoreDisplay.tsx index 01f1022c..6bdac5c7 100644 --- a/frontend/companion-app/components/ScoreDisplay.tsx +++ b/frontend/companion-app/components/ScoreDisplay.tsx @@ -87,8 +87,8 @@ export default function ScoreDisplay({ ); osdRef.current = osm; - // Retrieve the local XML content based on the selected score. - const xmlContent = scoresData[selectedScore]; + // If score name is a key within ScoreContents use the xml content value within that key, otherwise access xml content through the static key value mapping defined within scores.ts + const xmlContent = (state.scoreContents && state.scoreContents[selectedScore]) || scoresData[selectedScore]; // Error handling if no xml content for selected score is found if (!xmlContent) { console.error("Score content not found for:", selectedScore); diff --git a/frontend/companion-app/components/ScoreSelect.tsx b/frontend/companion-app/components/ScoreSelect.tsx index a0ac3f4e..e598990c 100644 --- a/frontend/companion-app/components/ScoreSelect.tsx +++ b/frontend/companion-app/components/ScoreSelect.tsx @@ -34,28 +34,34 @@ export function Score_Select({ // fetchScores(); // }, [dispatch]); - // Array of score names used to render score display options + // Array of score names used to render score display options const musicxmlFiles: string[] = [ 'air_on_the_g_string.musicxml', 'twelve_duets.musicxml', ]; - - + useEffect(()=> { - console.log("Local scores: ", musicxmlFiles); dispatch({ type: "new_scores_from_backend", scores: musicxmlFiles }); // pass in defined array of musicxml files }, [dispatch]) const handleFileUpload = (file: File) => { const reader = new FileReader(); reader.onload = (e) => { - const xmlContent = e.target?.result as string; - const newScore = { - filename: file.name, - piece: file.name.replace(".musicxml", ""), - content: xmlContent, - }; - dispatch({ type: "new_score_from_upload", score: newScore }); + const xmlContent = e.target?.result as string; + const fileName = file.name; // extract the file name + + if (!state.scores.includes(fileName)) { // only add new score if the new uploaded score's name isn't already stored within scores + const newScore = { + filename: file.name, + piece: file.name.replace(".musicxml", ""), + content: xmlContent, + }; + dispatch({ type: "new_score_from_upload", score: newScore }); + } + }; + + reader.onerror = (e) => { + console.error("Error reading file:", e); }; reader.readAsText(file); }; diff --git a/frontend/companion-app/package-lock.json b/frontend/companion-app/package-lock.json index 805ad8d7..05a91dcb 100644 --- a/frontend/companion-app/package-lock.json +++ b/frontend/companion-app/package-lock.json @@ -14,6 +14,9 @@ "expo-asset": "~10.0.10", "expo-av": "~14.0.7", "expo-status-bar": "~1.12.1", + "fft-js": "^0.0.12", + "fft.js": "^4.0.4", + "node-wav": "^0.0.2", "opensheetmusicdisplay": "^1.8.9", "react": "18.2.0", "react-dom": "18.2.0", @@ -22,7 +25,8 @@ "react-native-picker-select": "^9.3.1", "react-native-vector-icons": "^10.2.0", "react-native-web": "~0.19.10", - "react-native-webview": "^13.12.3" + "react-native-webview": "^13.12.3", + "wave-resampler": "^1.0.0" }, "devDependencies": { "@babel/core": "^7.20.0", @@ -6854,8 +6858,7 @@ "node_modules/bit-twiddle": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/bit-twiddle/-/bit-twiddle-1.0.2.tgz", - "integrity": "sha512-B9UhK0DKFZhoTFcfvAzhqsjStvGJp9vYWf3+6SNTtdSQnvIgfkHbgHrg/e4+TH71N2GDu8tpmCVoyfrL1d7ntA==", - "optional": true + "integrity": "sha512-B9UhK0DKFZhoTFcfvAzhqsjStvGJp9vYWf3+6SNTtdSQnvIgfkHbgHrg/e4+TH71N2GDu8tpmCVoyfrL1d7ntA==" }, "node_modules/bl": { "version": "4.1.0", @@ -9173,6 +9176,34 @@ "resolved": "https://registry.npmjs.org/fetch-retry/-/fetch-retry-4.1.1.tgz", "integrity": "sha512-e6eB7zN6UBSwGVwrbWVH+gdLnkW9WwHhmq2YDK1Sh30pzx1onRVGBvogTlUeWxwTa+L86NYdo4hFkh7O8ZjSnA==" }, + "node_modules/fft-js": { + "version": "0.0.12", + "resolved": "https://registry.npmjs.org/fft-js/-/fft-js-0.0.12.tgz", + "integrity": "sha512-nLOa0/SYYnN2NPcLrI81UNSPxyg3q0sGiltfe9G1okg0nxs5CqAwtmaqPQdGcOryeGURaCoQx8Y4AUkhGTh7IQ==", + "dependencies": { + "bit-twiddle": "~1.0.2", + "commander": "~2.7.1" + }, + "engines": { + "node": ">=0.12.0" + } + }, + "node_modules/fft-js/node_modules/commander": { + "version": "2.7.1", + "resolved": "https://registry.npmjs.org/commander/-/commander-2.7.1.tgz", + "integrity": "sha512-5qK/Wsc2fnRCiizV1JlHavWrSGAXQI7AusK423F8zJLwIGq8lmtO5GmO8PVMrtDUJMwTXOFBzSN6OCRD8CEMWw==", + "dependencies": { + "graceful-readlink": ">= 1.0.0" + }, + "engines": { + "node": ">= 0.6.x" + } + }, + "node_modules/fft.js": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/fft.js/-/fft.js-4.0.4.tgz", + "integrity": "sha512-f9c00hphOgeQTlDyavwTtu6RiK8AIFjD6+jvXkNkpeQ7rirK3uFWVpalkoS4LAwbdX7mfZ8aoBfFVQX1Re/8aw==" + }, "node_modules/file-entry-cache": { "version": "6.0.1", "resolved": "https://registry.npmjs.org/file-entry-cache/-/file-entry-cache-6.0.1.tgz", @@ -9715,6 +9746,11 @@ "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz", "integrity": "sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==" }, + "node_modules/graceful-readlink": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/graceful-readlink/-/graceful-readlink-1.0.1.tgz", + "integrity": "sha512-8tLu60LgxF6XpdbK8OW3FA+IfTNBn1ZHGHKF4KQbEeSkajYw5PlYJcKluntgegDPTg8UkHjpet1T82vk6TQ68w==" + }, "node_modules/graphemer": { "version": "1.4.0", "resolved": "https://registry.npmjs.org/graphemer/-/graphemer-1.4.0.tgz", @@ -12942,6 +12978,14 @@ "url": "https://github.com/sponsors/antelle" } }, + "node_modules/node-wav": { + "version": "0.0.2", + "resolved": "https://registry.npmjs.org/node-wav/-/node-wav-0.0.2.tgz", + "integrity": "sha512-M6Rm/bbG6De/gKGxOpeOobx/dnGuP0dz40adqx38boqHhlWssBJZgLCPBNtb9NkrmnKYiV04xELq+R6PFOnoLA==", + "engines": { + "node": ">=4.4.0" + } + }, "node_modules/nopt": { "version": "6.0.0", "resolved": "https://registry.npmjs.org/nopt/-/nopt-6.0.0.tgz", @@ -16365,6 +16409,14 @@ "makeerror": "1.0.12" } }, + "node_modules/wave-resampler": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/wave-resampler/-/wave-resampler-1.0.0.tgz", + "integrity": "sha512-bE3rbpZXuKAV52Cd8/BeJvy82ZqEHK8pPWHrZ9JioaVVTBlmWbDC+u4p9blhFcf0Skepb4hlOAHc25XfqLC48g==", + "engines": { + "node": ">=8" + } + }, "node_modules/wcwidth": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/wcwidth/-/wcwidth-1.0.1.tgz", diff --git a/frontend/companion-app/package.json b/frontend/companion-app/package.json index a7d713af..a65a57d7 100644 --- a/frontend/companion-app/package.json +++ b/frontend/companion-app/package.json @@ -17,6 +17,9 @@ "expo-asset": "~10.0.10", "expo-av": "~14.0.7", "expo-status-bar": "~1.12.1", + "fft-js": "^0.0.12", + "fft.js": "^4.0.4", + "node-wav": "^0.0.2", "opensheetmusicdisplay": "^1.8.9", "react": "18.2.0", "react-dom": "18.2.0", @@ -25,7 +28,8 @@ "react-native-picker-select": "^9.3.1", "react-native-vector-icons": "^10.2.0", "react-native-web": "~0.19.10", - "react-native-webview": "^13.12.3" + "react-native-webview": "^13.12.3", + "wave-resampler": "^1.0.0" }, "devDependencies": { "@babel/core": "^7.20.0", diff --git a/frontend/companion-app/utils/features.tsx b/frontend/companion-app/utils/features.tsx new file mode 100644 index 00000000..eda4d11c --- /dev/null +++ b/frontend/companion-app/utils/features.tsx @@ -0,0 +1,332 @@ +/* +Copyright (c) 2024 Matthew Caren + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +// TypeScript conversion of features.py for chroma extraction +// ========================================================= +// This module provides functionality to compute CENS chroma features from audio, +// equivalent to the Python implementation in features.py. +// It includes pitch frequency calculation, spectrogram-to-pitch conversion matrix, +// a ChromaMaker class for streaming chroma calculation, and functions to process +// whole audio or files into np-CENS chromagrams. + +// Required dependencies (to install via npm): +// - node-wav (to decode WAV files to PCM data) +// - wave-resampler (to resample audio if needed) +// - fft-js (for FFT computation on audio frames) + +// Import necessary modules +const fs = require('fs'); +const wav = require('node-wav'); +const waveResampler = require('wave-resampler'); +const { fft } = require('fft-js'); + +/** + * Equivalent to Python function pitch_freqs. + * Returns the center frequency for each MIDI pitch in the range [start_pitch, end_pitch). + * @param start_pitch - starting MIDI pitch (inclusive) + * @param end_pitch - one more than the last MIDI pitch value (exclusive) + * @returns Array of length (end_pitch - start_pitch) with frequencies in Hz. + */ +function pitch_freqs(start_pitch: number = 0, end_pitch: number = 128): number[] { + const kTRT = Math.pow(2, 1/12.0); // 2^(1/12) + const freqs: number[] = []; + for (let p = start_pitch; p < end_pitch; p += 1) { + // Calculate frequency for MIDI pitch p (A4=69 -> 440 Hz) + const freq = 440 * Math.pow(kTRT, p - 69); + freqs.push(freq); + } + return freqs; +} + +/** + * Equivalent to Python function spec_to_pitch_mtx. + * Create a conversion matrix from an FFT spectrum vector to a MIDI pitch vector (log-frequency spectrogram). + * + * @param fs - sample rate of the audio + * @param fft_len - the length of the FFT + * @param tuning - optional pitch adjustment in semitones (MIDI) for alternate tunings (default 0) + * @returns A matrix of shape (128, num_bins) where num_bins = fft_len//2 + 1 (number of frequency bins in rfft output). + * Each row corresponds to a MIDI pitch (0-127) and each column to an FFT bin, representing the contribution of that bin's frequency to the given pitch. + */ +function spec_to_pitch_mtx(fs: number, fft_len: number, tuning: number = 0.0): number[][] { + const num_bins = Math.floor(fft_len / 2) + 1; + // Initialize output matrix 128 x num_bins with zeros + const out: number[][] = Array.from({ length: 128 }, () => new Array(num_bins).fill(0)); + + // Frequencies for each FFT bin (from 0 to Nyquist) + const bin_f: number[] = []; + for (let i = 0; i < num_bins; i++) { + bin_f.push(i * fs / fft_len); + } + + // Frequency center for each MIDI pitch 0-127 (with tuning offset) and edges for each pitch band + const pitch_center = pitch_freqs(0 + tuning, 128 + tuning); + const pitch_edges = pitch_freqs(-0.5 + tuning, 128.5 + tuning); + + // Precompute a Hann window of length 128 (for distributing bin contributions across pitch frequencies) + const windowLength = 128; + const hann: number[] = new Array(windowLength); + for (let i = 0; i < windowLength; i++) { + // Hann (Hanning) window formula + hann[i] = 0.5 - 0.5 * Math.cos((2 * Math.PI * i) / (windowLength - 1)); + } + + // Fill the conversion matrix + for (let p = 0; p < 128; p++) { + const f1 = pitch_edges[p]; + const f3 = pitch_edges[p + 1]; + for (let j = 0; j < num_bins; j++) { + const x = bin_f[j]; + let value: number; + if (x <= f1 || x >= f3) { + // Outside the pitch band - assign 0 (Hann window is zero at edges) + value = 0; + } else { + // Linearly interpolate the Hann window value at frequency x between f1 and f3 + const fraction = (x - f1) / (f3 - f1); + const idx = fraction * (windowLength - 1); + const i0 = Math.floor(idx); + const frac = idx - i0; + // Ensure index is within [0, windowLength-2] for interpolation + if (i0 >= windowLength - 1) { + // If x is extremely close to f3 (fraction ~1), just use last value + value = hann[windowLength - 1]; + } else { + value = hann[i0] + frac * (hann[i0 + 1] - hann[i0]); + } + } + out[p][j] = value; + } + } + return out; +} + +/** + * Class equivalent to Python class ChromaMaker. + * Streaming implementation to convert audio frames (of length n_fft) into 12-dimensional CENS chroma vectors. + * Initialize with sample rate `sr` and FFT length `n_fft`. Then call `insert(y)` where y is an audio buffer of length n_fft. + */ +class ChromaMaker { + sr: number; + n_fft: number; + window: number[]; + c_fc: number[][]; // conversion matrix from FFT bins to chroma (12) bins + + constructor(sr: number, n_fft: number) { + // Equivalent to Python ChromaMaker.__init__ + // sr: sample rate, n_fft: FFT length (window size) + this.sr = sr; + this.n_fft = n_fft; + // 1) Create Hann (Hanning) window for FFT + this.window = new Array(n_fft); + for (let i = 0; i < n_fft; i++) { + this.window[i] = 0.5 - 0.5 * Math.cos((2 * Math.PI * i) / (n_fft - 1)); + } + // 2) Compute frequency-to-pitch conversion matrix (c_fp) for this sr and n_fft + const tuning = 0.0; + const c_fp = spec_to_pitch_mtx(this.sr, this.n_fft, tuning); // shape 128 x (n_fft/2+1) + // 3) Compute pitch-to-chroma (class) conversion matrix (c_pc) + // c_pc is a 12x128 matrix mapping 128 MIDI pitches to 12 pitch classes. + const c_pc: number[][] = Array.from({ length: 12 }, () => new Array(128).fill(0)); + for (let p = 0; p < 128; p++) { + const pitch_class = p % 12; + c_pc[pitch_class][p] = 1; + } + // 4) Compute full conversion from FFT bins to chroma: c_fc = c_pc * c_fp (matrix multiply) + const num_bins = Math.floor(n_fft / 2) + 1; + this.c_fc = Array.from({ length: 12 }, () => new Array(num_bins).fill(0)); + for (let chroma = 0; chroma < 12; chroma++) { + for (let j = 0; j < num_bins; j++) { + let sum = 0; + // sum over all pitches that map to this chroma class + for (let pitch = chroma; pitch < 128; pitch += 12) { + sum += c_fp[pitch][j]; + } + this.c_fc[chroma][j] = sum; + } + } + } + + /** + * Insert a new audio frame and compute its CENS chroma vector. + * @param y - audio frame of length n_fft (samples) + * @returns An array of length 12 representing the CENS chroma features for this frame. + */ + insert(y: Float32Array | number[]): number[] { + // Equivalent to Python ChromaMaker.insert + if (y.length !== this.n_fft) { + throw new Error(`Input frame length ${y.length} does not match expected length ${this.n_fft}.`); + } + // 1) Apply Hann window to the audio frame + const sig = new Array(this.n_fft); + for (let i = 0; i < this.n_fft; i++) { + sig[i] = (y as any)[i] * this.window[i]; + } + // 2) Compute magnitude spectrum using FFT (real FFT since input is real) + // Use fft-js to compute FFT. It returns an array of [real, imag] pairs. + const phasors = fft(sig); + const num_bins = Math.floor(this.n_fft / 2) + 1; + // Take the magnitude (absolute value) of FFT output for bins 0..num_bins-1 + const X: number[] = new Array(num_bins); + for (let k = 0; k < num_bins; k++) { + const re = phasors[k][0]; + const im = phasors[k][1]; + X[k] = Math.sqrt(re * re + im * im); + } + // Convert to chroma by projecting the power spectrum onto pitch classes: + // We use X**2 (power) for projection (as in Python code X**2). + const chromaVec: number[] = new Array(12).fill(0); + for (let i = 0; i < 12; i++) { + let sum = 0; + for (let j = 0; j < num_bins; j++) { + // use power = X[j]^2 + sum += this.c_fc[i][j] * (X[j] * X[j]); + } + chromaVec[i] = sum; + } + + // CENS post-processing steps: + // Step 1) Normalize by L1 norm (sum of absolute values) + let L1 = 0; + for (let i = 0; i < 12; i++) { + L1 += Math.abs(chromaVec[i]); + } + if (L1 === 0) { + // if all zeros, set each to 1 (to avoid division by zero) + chromaVec.fill(1); + L1 = 12; + } + for (let i = 0; i < 12; i++) { + chromaVec[i] /= L1; + } + + // Step 2) Quantize according to a logarithmic scheme (resulting values 0–4) + const quantized: number[] = new Array(12).fill(0); + const values = [1, 2, 3, 4]; + const thresholds = [0.05, 0.1, 0.2, 0.4, 1.0]; + for (let idx = 0; idx < values.length; idx++) { + const v = values[idx]; + const lower = thresholds[idx]; + const upper = thresholds[idx + 1]; + for (let i = 0; i < 12; i++) { + if (chromaVec[i] > lower && chromaVec[i] <= upper) { + quantized[i] = v; + } + } + } + // Any chroma value <= 0.05 remains 0 in quantized (above loop doesn't set it) + + // Step 3) (Optional smoothing step would be here - omitted as in Python code) + + // Step 4) Normalize by L2 norm + let L2 = 0; + for (let i = 0; i < 12; i++) { + L2 += quantized[i] * quantized[i]; + } + L2 = Math.sqrt(L2); + if (L2 === 0) { + // if all zero (shouldn't happen after step 1 unless all were exactly 0), + // set each to 1 (so each value is 1) and adjust L2 to sqrt(12) + quantized.fill(1); + L2 = Math.sqrt(12); + } + const chromaNorm: number[] = new Array(12); + for (let i = 0; i < 12; i++) { + chromaNorm[i] = quantized[i] / L2; + } + return chromaNorm; + } +} + +/** + * Convert an entire audio signal to an np-CENS chromagram (12 x M matrix). + * Equivalent to Python function audio_to_np_cens. + * + * @param y - audio samples (mono) as Float32Array or number[]. + * @param sr - sample rate of the audio. + * @param n_fft - FFT window size to use for frames. + * @param hop_len - hop length (stride) in samples between successive frames. + * @returns A 2D array of shape [12][M], where M is the number of chroma vectors (frames). + */ +function audio_to_np_cens(y: Float32Array | number[], sr: number, n_fft: number, hop_len: number): number[][] { + // Calculate number of full frames of length n_fft that fit in the signal with given hop length + const M = Math.floor((y.length - n_fft) / hop_len) + 1; + const chromagram: number[][] = Array.from({ length: 12 }, () => new Array(M).fill(0)); + const cm = new ChromaMaker(sr, n_fft); + // Process each frame + for (let m = 0; m < M; m++) { + const start = m * hop_len; + const frame = (y instanceof Float32Array) + ? y.subarray(start, start + n_fft) + : (y as number[]).slice(start, start + n_fft); + const chromaVec = cm.insert(frame); + for (let i = 0; i < 12; i++) { + chromagram[i][m] = chromaVec[i]; + } + } + return chromagram; +} + +/** + * Load an audio file and convert it to an np-CENS chromagram. + * Equivalent to Python function file_to_np_cens. + * + * @param filepath - path to the audio file (WAV format expected). + * @param params - object containing parameters: + * { sr: desired sample rate (Hz), n_fft: FFT length, ref_hop_len: hop length in samples }. + * @returns A 12 x M chromagram matrix as a 2D array of numbers. + */ +function file_to_np_cens(filepath: string, params: { sr: number; n_fft: number; ref_hop_len: number; }): number[][] { + // Read the file and decode WAV audio + const buffer = fs.readFileSync(filepath); + const result = wav.decode(buffer); + let audioData: Float32Array = result.channelData[0]; + // If more than one channel, convert to mono by averaging channels + if (result.channelData.length > 1) { + const numChannels = result.channelData.length; + const length = result.channelData[0].length; + // Average across channels for each sample + const mono = new Float32Array(length); + for (let i = 0; i < length; i++) { + let sum = 0; + for (let ch = 0; ch < numChannels; ch++) { + sum += result.channelData[ch][i]; + } + mono[i] = sum / numChannels; + } + audioData = mono; + } + // Resample to desired sample rate if needed + const origSr = result.sampleRate; + const targetSr = params.sr; + let resampled: Float32Array = audioData; + if (origSr !== targetSr) { + // Use wave-resampler to resample from origSr to targetSr + // The resample function returns a Float32Array (by default, uses cubic interpolation with anti-aliasing). + const resampledData: Float32Array | number[] = waveResampler.resample(audioData, origSr, targetSr); + resampled = resampledData instanceof Float32Array ? resampledData : Float32Array.from(resampledData); + } + // Now compute the chromagram from the audio data + return audio_to_np_cens(resampled, targetSr, params.n_fft, params.ref_hop_len); +} + +// Export functions and class for external use +export { pitch_freqs, spec_to_pitch_mtx, ChromaMaker, audio_to_np_cens, file_to_np_cens }; diff --git a/frontend/companion-app/utils/mic-processor.js b/frontend/companion-app/utils/mic-processor.js new file mode 100644 index 00000000..5c1379f9 --- /dev/null +++ b/frontend/companion-app/utils/mic-processor.js @@ -0,0 +1,37 @@ +/* eslint-env es2021, browser */ +/* global AudioWorkletProcessor, registerProcessor */ + +// A custom audio processor that captures live microphone audio +// and sends 4096-sample chunks to App.tsx for further processing +class MicProcessor extends AudioWorkletProcessor { + constructor() { + super(); + this._buffer = new Float32Array(4096); // Create a buffer to collect 4096 audio samples (same size as ChromaMaker's FFT window) + this._bufIndex = 0; // Tracks how many samples have been added to the buffer + } + + // This method is called repeatedly + process(inputs, outputs) { + const input = inputs[0]; // Get audio from the first input + + // Check if we have valid input data + if (input && input[0]) { + const samples = input[0]; // An array of audio samples (128 samples per call) + + // Loop through each sample and fill our custom buffer + for (let i = 0; i < samples.length; i++) { + this._buffer[this._bufIndex++] = samples[i]; + + // Once we've collected 4096 samples, send them to App.tsx + if (this._bufIndex >= this._buffer.length) { + this.port.postMessage(this._buffer.slice(0)); // Send a copy of the full buffer + this._bufIndex = 0; // Reset buffer index to start collecting the next chunk + } + } + } + + return true; // Return true to keep the processor running + } +} +// Register this processor under the name 'mic-processor' for it to be used by App.tsx using addModule() +registerProcessor('mic-processor', MicProcessor);