Companion-code/backend/main.py at 8319d6ac2de7ff10e39e5ed1a92fe7fe322af4a9 · Purdue-Artificial-Intelligence-in-Music/Companion-code · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import numpy as np
from src.alignment_error import load_data, calculate_alignment_error
from src.accompaniment_error import calculate_accompaniment_error
import os
import librosa
from src.audio_buffer import AudioBuffer
from src.score_follower import ScoreFollower
from src.midi_performance import MidiPerformance
from src.audio_generator import AudioGenerator
import soundfile as sf
import pyaudio
from time import time, sleep

def normalize_audio(audio: np.ndarray) -> np.ndarray:
    """Normalize audio data to the range [-1, 1]."""
    return audio / np.max(np.abs(audio))

SAMPLE_RATE = 44100  # Universal sample rate
CHANNELS = 1  # Universal number of channels
WIN_LENGTH = 4096  # Samples per window for score follower
HOP_LENGTH = 4096  # Samples per hop for score follower. This should be the same as WIN_LENGTH for now. Can add support for different values in the future
C = 50  # Search width for score follower. Higher values are more computationally expensive
MAX_RUN_COUNT = 3  # Slope constraint for score follower. 1 / MAX_RUN_COUNT <= slope <= MAX_RUN_COUNT
DIAG_WEIGHT = 0.75  # Weight for the diagonal in the cost matrix for score follower. Values less than 2 bias toward diagonal steps
MAX_DURATION = 600  # Maximum duration of audio buffer in seconds


STATED_TEMPO = 100  # Tempo at which the user plans to play in BPM
SOURCE_TEMPO = 110  # Tempo at which the user actually plays in BPM
PIECE_NAME = 'air_on_the_g_string'  # Name of piece
PROGAM_NUMBER = 42  # Program number for accompaniment instrument
SOLO_VOLUME_MULTIPLIER = 0.75

MIDI_SCORE = os.path.join('data', 'midi', PIECE_NAME + '.mid')  # Path to MIDI file
OUTPUT_DIR = os.path.join('data', 'audio', PIECE_NAME)  # Directory where synthesized audio will be saved

generator = AudioGenerator(score_path=MIDI_SCORE)  # Create an AudioGenerator instance
generator.generate_audio(output_dir=os.path.join(OUTPUT_DIR, f'{STATED_TEMPO}bpm'), tempo=STATED_TEMPO)  # Generate a WAV file for each instrument in the MIDI file
generator.generate_audio(output_dir=os.path.join(OUTPUT_DIR, f'{SOURCE_TEMPO}bpm'), tempo=SOURCE_TEMPO)  # Generate a WAV file for each instrument in the MIDI file at a different tempo

reference = os.path.join('data', 'audio', PIECE_NAME, f'{STATED_TEMPO}bpm', 'instrument_0.wav')  # Path to reference audio file
source = os.path.join('data', 'audio', PIECE_NAME, f'{SOURCE_TEMPO}bpm', 'instrument_0.wav')  # Path to soloist audio file (can optionally replace mic input)

source_audio, _ = librosa.load(source, sr=SAMPLE_RATE)  # load soloist audio
source_audio = source_audio.reshape((CHANNELS, -1))  # reshape soloist audio to 2D array
source_index = 0  # index to keep track of soloist audio
use_mic = False  # set to True to use microphone input, False to use prerecorded soloist audio

# Create an audio buffer to store the soloist audio
solo_buffer = AudioBuffer(max_duration=MAX_DURATION, sample_rate=SAMPLE_RATE, channels=1)

# Create a ScoreFollower instance to track the soloist
score_follower = ScoreFollower(reference=reference,
                               c=C,
                               max_run_count=MAX_RUN_COUNT,
                               diag_weight=DIAG_WEIGHT,
                               sample_rate=SAMPLE_RATE,
                               win_length=WIN_LENGTH)

soloist_times = []
estimated_times = []
accompanist_times = []
playback_rates = []

# PyAudio callback function
def callback(in_data, frame_count, time_info, status):
    global source_index

    if use_mic:  # If using microphone input
        data = np.frombuffer(in_data, dtype=np.float32)  # convert data to numpy array
        data = data.reshape((1, -1))  # reshape data to 2D array
    else:  # If using prerecorded soloist audio
        data = source_audio[:, source_index:source_index + frame_count]  # get audio data from source audio
        source_index += frame_count  # update source index

    solo_buffer.write(data)  # write soloist audio to buffer so it can be saved to a WAV file later

    estimated_time = score_follower.step(data)  # get estimated time in soloist audio in seconds
    soloist_times.append(source_index / SAMPLE_RATE)  # log soloist time for error analysis
    estimated_times.append(estimated_time)  # log estimated time for error analysis

    position = estimated_time  / 60 * STATED_TEMPO  # convert estimated time (seconds) to position in piece (beats)

    # Tell the MidiPerformance instance to update the score position.
    # The MidiPerformance instance will play the most recently passed note in the accompaniment.
    performance.update_score_position(position)
    sleep(0.01)  # update roughly every 10ms
    return (SOLO_VOLUME_MULTIPLIER * data, pyaudio.paContinue)  # Output the solo to the speakers. The accompaniment is already being played by the MidiPerformance instance.

# PyAudio
p = pyaudio.PyAudio()

# Open a stream with the callback function
stream = p.open(rate=SAMPLE_RATE,
                channels=CHANNELS,
                format=pyaudio.paFloat32,
                input=True,
                output=True,
                frames_per_buffer=WIN_LENGTH,
                start=False,
                stream_callback=callback)

# Create a MidiPerformance instance with a MIDI file and an initial tempo (BPM).
performance = MidiPerformance(midi_file_path=MIDI_SCORE, tempo=STATED_TEMPO, instrument_index=1, program_number=PROGAM_NUMBER)

# Wait for user input to start the performance
input('Press Enter to start the performance')

# Start the performance to start playing the accompaniment
performance.start()

# Start the stream to start recording the soloist
stream.start_stream()

try:
    # Wait for stream to finish
    while stream.is_active():
        sleep(0.01)
except KeyboardInterrupt:
    pass

# Close the stream
stream.close()

# Release PortAudio system resources
p.terminate()

# Save the soloist audio to a WAV file
solo_audio = solo_buffer.get_audio()
solo_audio = normalize_audio(solo_audio)
solo_audio = solo_audio.reshape(-1)
sf.write('solo.wav', solo_audio, SAMPLE_RATE)

# df_alignment = load_data('data\\alignments\\constant_tempo.csv')
# warping_path = np.asarray([estimated_times, soloist_times], dtype=np.float32).T
# df_alignment = calculate_alignment_error(df_alignment, warping_path)

# estimated_times = warping_path[:, 0]
# df_accompaniment = calculate_accompaniment_error(
#     df_alignment,
#     estimated_times=estimated_times,
#     accompanist_times=np.asarray(accompanist_times)
# )

# df_accompaniment.to_csv(
#     'output\\error_analysis_per_measure_constant.csv', index=False)
# print(df_accompaniment)