Purdue-Artificial-Intelligence-in-Music · rtjord · Mar 5, 2025 · Feb 12, 2025 · Feb 12, 2025 · Feb 12, 2025
diff --git a/backend/src/otw.py b/backend/src/otw.py
@@ -162,9 +162,13 @@ def _get_best_step(self) -> Tuple[str, Tuple[int, int]]:
         if self.accumulated_cost[best_j, self.live_index] < self.accumulated_cost[self.ref_index, best_t]:
             best_t = self.live_index
             step = "live"
-        else:  # Otherwise, move in the reference sequence
+        elif self.accumulated_cost[best_j, self.live_index] > self.accumulated_cost[self.ref_index, best_t]:  # Otherwise, move in the reference sequence
             best_j = self.ref_index
             step = "ref"
+        else:
+            best_t = self.live_index
+            best_j = self.ref_index
+            step = "both"
 
         # If the best step is to move in both sequences, choose the one with the lowest cost
         if best_t == self.live_index and best_j == self.ref_index:
@@ -232,5 +236,4 @@ def _update_accumulated_cost(self, ref_index: int, live_index: int):
         if live_index > 0:
             steps.append(
                 self.accumulated_cost[ref_index, live_index - 1] + cost)
-
         self.accumulated_cost[ref_index, live_index] = min(steps)
diff --git a/backend/src/score_follower.py b/backend/src/score_follower.py
@@ -104,7 +104,7 @@ def step(self, frames: np.ndarray) -> float:
         self.path.append((ref_index, self.otw.live_index))
 
         # Return timestamp in the reference audio in seconds
-        return ref_index * self.win_length / self.sample_rate
+        return (ref_index+1) * self.win_length / self.sample_rate 
 
     def get_backwards_path(self, b):
         cost_matrix = self.otw.accumulated_cost
@@ -140,24 +140,26 @@ def get_path_difference(self, back_path):
     import matplotlib.pyplot as plt
     import os
     import librosa
-    reference = os.path.join('data', 'audio', 'bach', 'synthesized', 'solo.wav')
-    source = os.path.join('data', 'audio', 'bach', 'synthesized', 'solo.wav')
+    reference = os.path.join('data', 'audio', 'twinkle_twinkle', '200bpm', 'instrument_0.wav')
+    source = os.path.join('data', 'audio', 'twinkle_twinkle', '200bpm', 'instrument_0.wav')
 
     source_audio = librosa.load(source, sr=44100)
     source_audio = source_audio[0].reshape((1, -1))
 
     score_follower = ScoreFollower(reference=reference,
-                                   c=10,
+                                   c=50,
                                    max_run_count=3,
                                    diag_weight=0.5,
                                    sample_rate=44100,
-                                   win_length=8192)
+                                   win_length=4096)
 
-    for i in range(0, source_audio.shape[-1], 8192):
-        frames = source_audio[:, i:i+8192]
+    for i in range(0, source_audio.shape[-1], 4096):
+        frames = source_audio[:, i:i+4096]
         estimated_time = score_follower.step(frames)
+        # print(
+        #     f'Live index: {score_follower.otw.live_index}, Ref index: {score_follower.otw.ref_index}')
         print(
-            f'Live index: {score_follower.otw.live_index}, Ref index: {score_follower.otw.ref_index}')
+            f'Estimated time: {estimated_time}, Ref index: {score_follower.otw.ref_index * score_follower.win_length / score_follower.sample_rate}')
 
 
     print(score_follower.path)