fix end prediction; schedule batch_len

victor-shepardson · victor-shepardson · commit f7625c519352 · 2022-04-16T20:09:37.000Z
diff --git a/examples/notepredictor/generate.scd b/examples/notepredictor/generate.scd
@@ -20,7 +20,8 @@
 MIDIClient.init
 MIDIClient.destinations
 ~m1 = MIDIOut.newByName("IAC Driver", "Bus 1");
-~m2 = MIDIOut.newByName("IAC Driver", "IAC Bus 2");
+~m2 = MIDIOut.newByName("IAC Driver", "Bus 2");
+~m3 = MIDIOut.newByName("IAC Driver", "Bus 3");
 /*
 ~m1.noteOn(0, 60, 127)
 ~m2.noteOn(0, 60, 127)
@@ -35,7 +36,11 @@ MIDIClient.destinations
         var group = (prog-1 /8).asInteger;
         var idx = (prog-1 %8).asInteger;
         var port = switch(group)
-        { 0}{(idx<6).if{~m1}{~m2}} //piano
+        { 0}{case
+            {idx<4}{~m1} //acoustic
+            {idx<6}{~m2} //electric
+            {true}{~m3} //harpsichord
+        } //piano
         { 1}{((idx<3)||(idx==5)).if{~m1}{~m2}} //chromatic perc
         { 2}{(idx<4).if{~m1}{~m2}} //organ
         { 3}{(idx<5).if{~m1}{~m2}} //guitar
@@ -69,7 +74,7 @@ MIDIClient.destinations
     }
 };
 ~release_all = {arg vel=0;
-    [~m1, ~m2].do{arg port; 128.do{arg note; 16.do{arg chan; port.noteOff(chan, note, vel)}}}
+    [~m1, ~m2, ~m3].do{arg port; 128.do{arg note; 16.do{arg chan; port.noteOff(chan, note, vel)}}}
 };
 )
 
@@ -202,6 +207,7 @@ MIDIdef.noteOn(\input_on, {
     //get a new prediction in light of current note
     b.sendMsg("/predictor/predict",
         \inst, inst, \pitch, num, \time, dt, \vel, val,
+        // \fix_instrument, ~player_inst,
         \allow_start, false, \allow_end, false,
         \pitch_temp, 0.5, \rhythm_temp, 0.5, \timing_temp, 0.1,
         \min_time, ~delay, \max_time, 5
@@ -283,9 +289,12 @@ OSCdef(\return, {
                     b.sendMsg("/predictor/predict",
                         \inst, inst, \pitch, pitch, \time, dt_actual, \vel, vel,
                         \allow_start, false, \allow_end, true,
-                        \instrument_temp, 1, \pitch_temp, 0.9, \rhythm_temp, 0.7, \timing_temp, 0.05,
+                        // \fix_instrument, ~player_inst,
+                        // \fix_time, 2.rand*0.1+~delay,
+                        \instrument_temp, 1, \pitch_temp, 0.9,
+                        \rhythm_temp, 1, \timing_temp, 0.05,
                         // \instrument_temp, 1, \pitch_temp, 1, \rhythm_temp, 1, \timing_temp, 1,
-                        \min_time, ~delay,
+                        // \min_time, ~delay,
                         \max_time, 5,
 
                     );
diff --git a/notepredictor/notepredictor/model.py b/notepredictor/notepredictor/model.py
@@ -204,11 +204,11 @@ def __init__(self,
                 p.weight.mul_(1e-2)
             self.end_proj.weight.mul(1e-2)
 
-        # IDEA: instead of this, combine current embeddings (independently) with h via MLPs
-        # stacked along a new final dim
-        # matmul by mask, which is easier (?) to vary per batch/time
+        # IDEA: instead of this, combine current embeddings (independently) with h by
+        # projecting to h size, stacking with h along a new final dim,
+        # matmul by n+1 x n mask, which is easier (?) to vary per batch/time
         # (compared to permute-and-cumsum)
-        # then tanh, unbind and more independent MLPs -> dist params
+        # then tanh, unbind and independent MLPs -> dist params
         self.xformer = ModalityTransformer(emb_size, ar_hidden, ar_heads, ar_layers)
 
         # persistent RNN state for inference
@@ -260,14 +260,13 @@ def forward(self, instruments, pitches, times, velocities, ends, validation=Fals
             t.expand(self.rnn.num_layers, x.shape[0], -1).contiguous() # 1 x batch x hidden
             for t in self.initial_state)
         h, _ = self.rnn(x, initial_state) #batch, time, hidden_size
-        h = h[:,:-1] # skip last time position
 
         # fit all note factorizations (e.g. pitch->time->vel vs vel->time->pitch)
         # TODO: perm each batch item independently?
         # get a random ordering for note modalities:
         perm = torch.randperm(self.note_dim)
         # chunk RNN state into Transformer inputs
-        hs = list(self.h_proj(h).chunk(self.note_dim+1, -1))
+        hs = list(self.h_proj(h[:,:-1]).chunk(self.note_dim+1, -1)) # skip last time position
         h_ctx = hs[0]
         h_tgt = [hs[i+1] for i in perm]
         # embed ground truth values for teacher-forcing
@@ -299,10 +298,11 @@ def forward(self, instruments, pitches, times, velocities, ends, validation=Fals
         vel_log_probs = vel_result.pop('log_prob')
 
         # end prediction
-        # skip the last position for convenience (so masking is the same)
-        end_params = self.end_proj(h)
+        # skip the first position for convenience 
+        # (so masking is the same for end as for note parts)
+        end_params = self.end_proj(h[:,1:])
         end_logits = F.log_softmax(end_params, -1)
-        end_log_probs = end_logits.gather(-1, ends[:,:-1,None])[...,0]
+        end_log_probs = end_logits.gather(-1, ends[:,1:,None])[...,0]
 
         r = {
             'end_log_probs': end_log_probs,
@@ -553,6 +553,7 @@ def predict(self,
             pred_vel = predicted[iperm[3]]
 
             end_params = self.end_proj(h)
+            print(end_params)
             end = D.Categorical(logits=end_params).sample()
             if not allow_end:
                 end[:] = 0
diff --git a/notepredictor/scripts/train_notes.py b/notepredictor/scripts/train_notes.py
@@ -25,6 +25,8 @@ def __init__(self,
         model = None, # dict of model constructor overrides
         batch_size = 128,
         batch_len = 64,
+        batch_len_schedule = None,
+        batch_len_max = 512,
         lr = 3e-4,
         adam_betas = (0.9, 0.999),
         adam_eps = 1e-08, 
@@ -209,7 +211,7 @@ def validate():
                 vel = batch['velocity'].to(self.device, non_blocking=True)
 
                 self.iteration += 1
-                self.exposure += self.batch_size # * self.batch_len
+                self.exposure += self.batch_size * self.batch_len
                 logs = {}
 
                 ### forward+backward+optimizer step ###
@@ -232,6 +234,11 @@ def validate():
 
             validate()
 
+            if self.batch_len_schedule is not None:
+                self.batch_len = min(
+                    self.batch_len_max, self.batch_len+self.batch_len_schedule)
+                self.dataset.batch_len = self.batch_len
+
             self.save(self.model_dir / f'{self.epoch:04d}.ckpt')
 
 def deep_update(a, b):