pitch top p; separate rhythm top p from timing temp; velocity min/max

victor-shepardson · victor-shepardson · commit a9b91f132077 · 2022-04-06T11:42:44.000Z
diff --git a/examples/notepredictor/generate.scd b/examples/notepredictor/generate.scd
@@ -1,6 +1,9 @@
 // in this example the model's predictions are fed back to it so it plays itself.
 // the player can add notes as well and start/stop/reset the model with a footswitch.
 
+// TODO: steerable generation. gui for ranges, temperatures;
+// MIDI controller for pitch set
+
 (
 ~gui = false;
 MIDIIn.connectAll;
@@ -18,26 +21,30 @@ s.boot;
 (
 SynthDef(\pluck, {
     var vel = \vel.kr;
-    var signal = Saw.ar(\freq.kr, 0.2) * EnvGate.new(1);
-    var fr = 2.pow(Decay.ar(Impulse.ar(0), 3)*6*vel+8);
+    var freq = \freq.kr;
+    var fl = freq.log2 - 1;
+    var signal = Saw.ar(freq, 0.2) * EnvGate.new(1);
+    var fr = 2.pow(Decay.ar(Impulse.ar(0), 3)*(13-fl)*vel+fl);
     signal = BLowPass.ar(signal, fr)*vel;
     Out.ar([0,1], signal);
 }).add
 )
 
-
 // measure round-trip latency
 (
 OSCdef(\return, {
     arg msg, time, addr, recvPort;
     (Process.elapsedTime - t).postln;
 }, '/prediction', nil);
 t = Process.elapsedTime;
-b.sendMsg("/predictor/predict", \pitch, 60+12.rand, \time, 0, \vel, 0);
+b.sendMsg("/predictor/predict",
+    \pitch, 60+12.rand, \time, 0, \vel, 0,
+    \pitch_temp, 0.5, \rhythm_temp, 0.5, \timing_temp, 0.1
+);
 )
 
 // set the delay for more precise timing
-~delay = 0.015;
+~delay = 0.016;
 
 // duet with the model
 // feeds the model's predictions back to it as well as player input
@@ -71,7 +78,7 @@ MIDIdef.program(\switch, {
 MIDIdef.noteOn(\input, {
     arg val, num, chan, src;
     var t2 = Process.elapsedTime;
-    var dt = t2-(t?t2); //time since last note
+    var dt = t2-(t?(t2-~delay)); //time since last note
 
     // cancel any pending predictions
     SystemClock.clear;
@@ -80,12 +87,13 @@ MIDIdef.noteOn(\input, {
     b.sendMsg("/predictor/predict",
         \pitch, num, \time, dt, \vel, val,
         \allow_start, false, \allow_end, false,
-        \time_temp, 0, \min_time, 0.1, \max_time, 5
-        // \fix_time, 9
+        \pitch_temp, 0.5, \rhythm_temp, 0.5, \timing_temp, 0.1,
+        \min_time, ~delay, \max_time, 5
+        // \fix_time, ~delay
     );
 
     // release the previous note
-    y.release(0.1);
+    y.release(0.05);
 
     // play the current note
     y = Synth(\pluck, [\freq, num.midicps, \vel, val/127]);//.release(1);
@@ -98,9 +106,9 @@ MIDIdef.noteOn(\input, {
     ~player_t = t;
 
     ~step = ~step + 1;
+    // ~step = 0;
 });
 
-
 // OSC return from python
 OSCdef(\return, {
     arg msg, time, addr, recvPort;
@@ -111,7 +119,7 @@ OSCdef(\return, {
     // time-to-next note gets 'censored' by the model
     // when over a threshold, in this case 10 seconds,
     // meaning it just predicts 10s rather than any longer time
-    var censor = dt>10.0;
+    var censor = dt>=10.0;
 
     censor.if{
         // if the predicted time is > 10 seconds, don't schedule it, just stop.
@@ -125,12 +133,13 @@ OSCdef(\return, {
                 (num==129).if{
                     // 129 is the 'stop token',  meaning 'end-of-performance'
                     // in this case don't schedule a note, and reset the model
-                    b.sendMsg("/predictor/reset");
+                    // b.sendMsg("/predictor/reset");
                     //release the last note
                     y.release(1.0);
                     // unset time so next note will have dt=0
-                    t = nil;
-                    \reset.postln
+                    // t = nil;
+                    // \reset.postln
+                    \end.postln;
                 }{
                     // cancel any pending predictions
                     // (there shouldn't be any, but might
@@ -139,9 +148,11 @@ OSCdef(\return, {
                     // feed model its own prediction as input
                     b.sendMsg("/predictor/predict",
                         \pitch, num, \time, dt_actual, \vel, val,
-                        \allow_start, false, \allow_end, false,
-                        \time_temp, 0.1, \min_time, 0.1, \max_time, 5
-                        // \fix_time, (~step%4==0).if{0.6}{0}  // tetrachords
+                        \allow_start, false, \allow_end, true,
+                        \pitch_temp, 0.7, \rhythm_temp, 0.7, \timing_temp, 0.1,
+                        \min_time, ~delay*2, \max_time, 5,
+                        \min_vel, 10
+                        // \fix_time, ((~step+1)%3==0).if{0.6}{0}  // triads
                         // \fix_time, (~step%8)*0.1 // specific rhythm
 
                     );
@@ -151,7 +162,7 @@ OSCdef(\return, {
                         y.release(1.0)
                     }{
                         // otherwise release fast to play a melody
-                        y.release(0.1)
+                        y.release(0.05)
                     };
                     // play the current note
                     y = Synth(\pluck, [
@@ -175,7 +186,15 @@ OSCdef(\return, {
 }, "/prediction", nil);
 )
 
+
+(
 // send a note manually if you don't have a MIDI controller:
+SystemClock.clear;
+y.release(0.2);
+b.sendMsg("/predictor/reset");
+{MIDIdef.all[\input].func.value(99, 60)}.defer(0.5);
+SystemClock.clear;
+)
 // b.sendMsg("/predictor/predict", \pitch, 70, \time, 0, \vel, 64);
 
 // load another model
diff --git a/iipyper/iipyper/__init__.py b/iipyper/iipyper/__init__.py
@@ -25,11 +25,9 @@ async def _run_async():
     # start OSC server
     for osc in OSC.instances:
         await osc.create_server(asyncio.get_event_loop())
-        # osc.create_client()
 
     for midi in MIDI.instances:
         asyncio.create_task(midi_coroutine(midi))
-        # asyncio.create_task(midi.get_coroutine())
 
     # start loop tasks
     if len(_loop_fns):
diff --git a/notepredictor/notepredictor/distributions.py b/notepredictor/notepredictor/distributions.py
@@ -6,6 +6,21 @@
 import torch.distributions as D
 import torch.nn.functional as F
 
+def reweight_top_p(probs, top_p):
+    """given tensor of probabilities, apply top p / "nucleus" filtering"""
+    # NOTE: this is fudged slightly, it doesn't 'interpolate' the cutoff bin
+    desc_probs, idx = probs.sort(-1, descending=True)
+    iidx = idx.argsort(-1)
+    cumprob = desc_probs.cumsum(-1)
+    # first index where cumprob >= top_p is the last index we don't zero
+    to_zero = (cumprob >= top_p).roll(1, -1)
+    to_zero[...,0] = False
+    # unsort
+    to_zero = to_zero.gather(-1, iidx)
+    weighted_probs = torch.zeros_like(probs).where(to_zero, probs)
+    return weighted_probs / weighted_probs.sum(-1, keepdim=True)
+    
+
 class CensoredMixtureLogistic(nn.Module):
     def __init__(self, n, res=1e-2, lo='-inf', hi='inf', 
             sharp_bounds=(1e-4,2e3), init=None):
@@ -113,16 +128,22 @@ def cdf(self, h, x):
 
     def cdf_components(self, loc, s, x):
         x_ = (x[...,None] - loc) * s
-        return x_.sigmoid()
+        return x_.sigmoid()        
 
-    def sample(self, h, truncate=None, shape=None, temp=None, bias=None):
+    # TODO: 'discrete_sample' method which would re-quantize and then allow
+    # e.g. nucleus sampling on the categorical distribution?
+    def sample(self, h, truncate=None, shape=None, 
+        weight_top_p=None, component_temp=None, bias=None):
         """
         Args:
             h: Tensor[...,n_params]
             truncate: Optional[Tuple[2]]. lower and upper bound for truncation.
             shape: Optional[int]. additional sample shape to be prepended to dims.
-            temp: Optional[float]. pseudo-temperature (temperature of each mixture 
-                component). default is 1. 0 would sample component location only,
+            weight_top_p: top_p ("nucleus") filtering for mixture weights.
+                default is 1 (no change to distribution). 0 would sample top
+                component (after truncation) only.
+            component_temp: Optional[float]. sampling temperature of each mixture 
+                component. default is 1. 0 would sample component location only,
                 ignoring sharpness.
             bias: applied outside of truncation but inside of clamping,
                 useful e.g. for latency correction when sampling delta-time
@@ -139,20 +160,31 @@ def sample(self, h, truncate=None, shape=None, temp=None, bias=None):
             truncate = (-np.inf, np.inf)
         truncate = torch.tensor(truncate)
 
-        if temp is None:
-            temp = 1
+        if component_temp is None:
+            component_temp = 1
 
         if bias is None:
             bias = 0
 
         log_pi, loc, s = self.get_params(h)
+        s = s/component_temp
         scale = 1/s
 
         # cdfs: [...,bound,component]
         cdfs = self.cdf_components(loc[...,None,:], s[...,None,:], truncate) 
         # prob. mass of each component witin bounds
         trunc_probs = cdfs[...,1,:] - cdfs[...,0,:] # [...,component]
         probs = log_pi.exp() * trunc_probs # reweighted mixture component probs
+        if weight_top_p is not None:
+            # reweight with top_p
+            probs = reweight_top_p(probs, weight_top_p)
+
+        ## DEBUG
+        # print(loc)
+        # print(s)
+        # print(trunc_probs)
+        # print(probs)
+        #, log_pi.exp(), trunc_probs)
 
         c = D.Categorical(probs).sample((shape,))
         # move sample dimension first
@@ -166,7 +198,7 @@ def sample(self, h, truncate=None, shape=None, temp=None, bias=None):
         u = u * (upper-lower) + lower
 
         # x = loc + scale * (u.log() - (1 - u).log())
-        x = loc + bias - scale * temp * (1/u - 1).log()
+        x = loc + bias - scale * (1/u - 1).log()
         x = x.clamp(self.lo, self.hi)
         return x[0] if unwrap else x
 
diff --git a/notepredictor/notepredictor/model.py b/notepredictor/notepredictor/model.py
@@ -8,7 +8,7 @@
 import torch.distributions as D
 
 from .rnn import GenericRNN
-from .distributions import CensoredMixtureLogistic
+from .distributions import CensoredMixtureLogistic, reweight_top_p
 
 class SineEmbedding(nn.Module):
     def __init__(self, n, w0=1e-3, interval=1.08):
@@ -211,7 +211,9 @@ def cell_state(self):
         
     def get_samplers(self, 
             pitch_topk=None, index_pitch=None, allow_start=False, allow_end=False, 
-            sweep_time=False, min_time=None, max_time=None, bias_time=None, time_temp=None):
+            pitch_top_p=None,
+            sweep_time=False, min_time=None, max_time=None, bias_time=None, time_weight_top_p=None, time_component_temp=None,
+            min_vel=None, max_vel=None):
         """
         this method converts the many arguments to `predict` into functions for
         sampling each note modality (e.g. pitch, time, velocity)
@@ -227,7 +229,10 @@ def sample_pitch(x):
             elif pitch_topk is not None:
                 return x.argsort(-1, True)[...,:pitch_topk].transpose(0,-1)
             else:
-                return D.Categorical(logits=x).sample()
+                probs = x.softmax(-1)
+                if pitch_top_p is not None:
+                    probs = reweight_top_p(probs, pitch_top_p)
+                return D.Categorical(probs).sample()
 
         def sample_time(x):
             # TODO: respect trunc_time when sweep_time is True
@@ -247,12 +252,19 @@ def sample_time(x):
                     -np.inf if min_time is None else min_time,
                     np.inf if max_time is None else max_time)
                 return self.time_dist.sample(x, 
-                    truncate=trunc, temp=time_temp, bias=bias_time)
+                    truncate=trunc, bias=bias_time,
+                    component_temp=time_component_temp, weight_top_p=time_weight_top_p)
+
+        def sample_velocity(x):
+            trunc = (
+                -np.inf if min_vel is None else min_vel,
+                np.inf if max_vel is None else max_vel)
+            return self.vel_dist.sample(x, truncate=trunc)
 
         return (
             sample_pitch, 
             sample_time,
-            lambda x: self.vel_dist.sample(x),
+            sample_velocity,
         )
 
     @property
@@ -340,12 +352,14 @@ def forward(self, pitches, times, velocities, validation=False):
                 )
         return r
     
-    # TODO: force
+    # TODO: remove pitch_topk and sweep_time?
     def predict(self, 
             pitch, time, vel, 
             fix_pitch=None, fix_time=None, fix_vel=None, 
             pitch_topk=None, index_pitch=None, allow_start=False, allow_end=False,
-            sweep_time=False, min_time=None, max_time=None, bias_time=None, time_temp=None):
+            sweep_time=False, min_time=None, max_time=None, bias_time=None, 
+            pitch_temp=None, rhythm_temp=None, timing_temp=None,
+            min_vel=None, max_vel=None):
         """
         consume the most recent note and return a prediction for the next note.
 
@@ -372,19 +386,25 @@ def predict(self,
             bias_time: add this delay to the time 
                 (after applying min/max but before clamping to 0).
                 may be useful for latency correction.
-            time_temp: if not None, apply pseudo-temperature to the time distribution.
-                i.e., scale the temperature of each mixture component.
-                this is not technically the same as changing the temperature of the whole
-                time distribution, but it can be useful if we assume each component
-                corresponds to a different rhythmic interval. then passing `time_temp=0`
-                would lead to more rhythmically steady, less random playing. 
+            pitch_temp: if not None, apply top_p sampling to pitch. 0 is
+                deterministic, 1 is 'natural' according to the model
+            rhythm_temp: if not None, apply top_p sampling to the weighting
+                of mixture components. this affects coarse rhythmic patterns; 0 is
+                deterministic, 1 is 'natural' according to the model
+            timing_temp: if not None, apply temperature sampling to the time
+                component. this affects fine timing; 0 is deterministic and precise,
+                1 is 'natural' according to the model.
+            min_vel, max_vel: if not None, truncate the velocity distribution
 
         Returns: dict of
             'pitch': int. predicted MIDI number of next note.
             'time': float. predicted time to next note.
             'velocity': float. unquantized predicted velocity of next note.
             '*_params': tensor. distrubution parameters for visualization purposes.
         """
+        if (index_pitch is not None) and (pitch_temp is not None):
+            print("warning: `index pitch` overrides `pitch_temp`")
+
         with torch.no_grad():
             pitch = torch.LongTensor([[pitch]]) # 1x1 (batch, time)
             time = torch.FloatTensor([[time]]) # 1x1 (batch, time)
@@ -409,7 +429,10 @@ def predict(self,
                 self.projections,
                 self.get_samplers(
                     pitch_topk, index_pitch, allow_start, allow_end, 
-                    sweep_time, min_time, max_time, bias_time, time_temp),
+                    pitch_temp,
+                    sweep_time, min_time, max_time, bias_time, 
+                    rhythm_temp, timing_temp,
+                    min_vel, max_vel),
                 self.embeddings,
                 ))
 
@@ -431,10 +454,11 @@ def predict(self,
             for i,(item, embed) in enumerate(zip(fix, self.embeddings)):
                 if item is None:
                     if (
-                        i==1 and (sweep_time 
-                            or (min_time is not None) or (max_time is not None)
-                            or (time_temp is not None)) or
-                        i==0 and pitch_topk
+                        i==0 and (pitch_topk or pitch_temp is not None) or
+                        i==1 and any(p is not None for p in (
+                            min_time, max_time, rhythm_temp, timing_temp)) or
+                        i==2 and any(p is not None for p in (
+                            min_vel, max_vel))
                         ):
                         cons_idx.append(i)
                     else:
@@ -449,7 +473,7 @@ def predict(self,
             iperm = np.argsort(perm) # inverse permutation back to canonical order
 
             md = ['pitch', 'time', 'vel']
-            print([md[i] for i in perm])
+            print('sampling order:', [md[i] for i in perm])
 
             # for each undetermined modality, 
             # sample a new value conditioned on alteady determined ones