floppy: apply NCO-based 2nd-order PLL to WDATA line

mx-shift · mx-shift · commit 473e8f6895bb · 2024-01-20T15:40:31.000-08:00
Centurion Finch Floppy Controller (FFC, https://github.com/Nakazoto/CenturionComputer/wiki/FFC-Board) is an 8" floppy controller that applies up to 350ns write precompensation. FlashFloppy's existing approach of re-aligning the bit clock to the last pulse edge causes enough additional error for bitcells to be missed or injected in this case. This commit applies a 2nd-order (proportional and integral) PLL to the incoming WDATA to mitigate both frequency offset and extreme write precompensation. The implementation is based on a virtual numerically-controlled oscillator with a frequency 2^16 times FlashFloppy's tick rate. Proportional and integral constants were chosen to nominally provide a 715kHz natural loop frequency and a damping factor of 1 and then adjusted to be powers of 2 to allow computation to use bit shifts instead of multiplication and division. This commit has been successfully tested with a variety of PC-compatible floppy controllers as well as Centurion FFC.
diff --git a/src/floppy_generic.c b/src/floppy_generic.c
@@ -10,6 +10,13 @@
  * See the file COPYING for more details, or visit <http://unlicense.org>.
  */
 
+/*
+ * PLL oscillator runs at 2^16 times the tick rate to allow for lots of
+ * precision in phase error calcuations.  PLL_PHASE_STEP is the initial PLL
+ * period which sets the PLL frequency equal to an ideal tick.
+ */
+#define PLL_PHASE_STEP (1 << 16)
+
 /* A DMA buffer for running a timer associated with a floppy-data I/O pin. */
 struct dma_ring {
     /* Current state of DMA (RDATA): 
@@ -33,7 +40,12 @@ struct dma_ring {
     uint16_t cons;
     union {
         uint16_t prod; /* dma_rd: our producer index for flux samples */
-        uint16_t prev_sample; /* dma_wr: previous CCRx sample value */
+        struct {
+            uint32_t phase_step;    /* PLL period */
+            int32_t phase_integral; /* PLL's running sum of phase errors */
+            uint32_t prev_bc_left;  /* PLL timestamp of left edge of previous bitcell */
+            uint32_t curr_bc_left;  /* PLL timestamp of left edge of current bitcell */
+        } wr;
     };
     /* DMA ring buffer of timer values (ARR or CCRx). */
     uint16_t buf[1024];
@@ -401,6 +413,10 @@ static void wdata_start(void)
     }
 
     dma_wr->state = DMA_starting;
+    dma_wr->wr.phase_step = PLL_PHASE_STEP;
+    dma_wr->wr.phase_integral = 0;
+    dma_wr->wr.prev_bc_left = 0;
+    dma_wr->wr.curr_bc_left = 0;
 
     /* Start timer. */
     tim_wdata->egr = TIM_EGR_UG;
@@ -643,13 +659,15 @@ static void IRQ_rdata_dma(void)
 static void IRQ_wdata_dma(void)
 {
     const uint16_t buf_mask = ARRAY_SIZE(dma_rd->buf) - 1;
-    uint16_t cons, prod, prev, next;
+    uint16_t cons, prod;
     uint32_t bc_dat = 0, bc_prod;
     uint32_t *bc_buf = image->bufs.write_bc.p;
     unsigned int sync = image->sync;
     unsigned int bc_bufmask = (image->bufs.write_bc.len / 4) - 1;
-    int curr, cell = image->write_bc_ticks;
     struct write *write = NULL;
+    uint32_t next_edge, distance_from_prev_bc_left, distance_from_curr_bc_left;
+    uint32_t bc_step;
+    int32_t phase_error, phase_proportional;
 
     /* Clear DMA peripheral interrupts. */
     dma1->ifcr = DMA_IFCR_CGIF(dma_wdata_ch);
@@ -669,27 +687,56 @@ static void IRQ_wdata_dma(void)
     }
 
     /* Process the flux timings into the raw bitcell buffer. */
-    prev = dma_wr->prev_sample;
     bc_prod = image->bufs.write_bc.prod;
     bc_dat = image->write_bc_window;
     for (cons = dma_wr->cons; cons != prod; cons = (cons+1) & buf_mask) {
-        next = dma_wr->buf[cons];
-        curr = (int16_t)(next - prev) - (cell >> 1);
-        if (unlikely(curr < 0)) {
-            /* Runt flux, much shorter than bitcell clock. Merge it forward. */
+        /* Calculate duration of a bitcell using the PLL's current period */
+        bc_step = dma_wr->wr.phase_step * (uint32_t)image->write_bc_ticks;
+
+        /*
+         * Incoming sample ticks are shifted up 16-bits to match PLL's internal
+         * precision
+         */
+        next_edge = (uint32_t)dma_wr->buf[cons] << 16;
+
+        /*
+         * If this is the first pulse after WGATE was asserted, treat it as
+         * perfectly in phase.
+         */
+        if (dma_wr->wr.prev_bc_left == 0 && dma_wr->wr.curr_bc_left == 0) {
+            dma_wr->wr.curr_bc_left = next_edge - (bc_step / 2);
+            dma_wr->wr.prev_bc_left = dma_wr->wr.curr_bc_left - bc_step;
+        }
+
+        /* By computing distance, wraparound is accounted for naturally. */
+        distance_from_prev_bc_left = next_edge - dma_wr->wr.prev_bc_left;
+
+        /* If the next edge would fall within the previous bitcell, ignore it. */
+        if (distance_from_prev_bc_left < (dma_wr->wr.curr_bc_left - dma_wr->wr.prev_bc_left))
+        {
             continue;
         }
-        prev = next;
-        while ((curr -= cell) > 0) {
+
+        /* Advance to the current bitcell */
+        distance_from_curr_bc_left = next_edge - dma_wr->wr.curr_bc_left;
+
+        /* Record zeros for each bitcell that passed before this pulse */
+        while (distance_from_curr_bc_left > bc_step)
+        {
             bc_dat <<= 1;
             bc_prod++;
-            if (!(bc_prod&31))
-                bc_buf[((bc_prod-1) / 32) & bc_bufmask] = htobe32(bc_dat);
+
+            if (!(bc_prod & 31))
+                bc_buf[((bc_prod - 1) / 32) & bc_bufmask] = htobe32(bc_dat);
+
+            distance_from_curr_bc_left -= bc_step;
+            dma_wr->wr.curr_bc_left += bc_step;
         }
-        curr += cell >> 1; /* remove the 1/2-cell bias */
-        prev -= curr >> 2; /* de-jitter/precomp: carry 1/4 of phase error */
+
+        /* Record a one for this bitcell */
         bc_dat = (bc_dat << 1) | 1;
         bc_prod++;
+
         switch (sync) {
         case SYNC_fm:
             /* FM clock sync clock byte is 0xc7. Check for:
@@ -704,6 +751,49 @@ static void IRQ_wdata_dma(void)
         }
         if (!(bc_prod&31))
             bc_buf[((bc_prod-1) / 32) & bc_bufmask] = htobe32(bc_dat);
+
+        /*
+         * Figure out the phase error of the current sample before we start
+         * mucking with state
+         */
+        phase_error = ((int32_t)distance_from_curr_bc_left - ((int32_t)bc_step / 2)) / (int32_t)image->write_bc_ticks;
+
+        /* Adjust bitcell history for next iteration */
+        dma_wr->wr.prev_bc_left = dma_wr->wr.curr_bc_left;
+        dma_wr->wr.curr_bc_left += bc_step;
+
+        /*
+         * Calculate P and I terms from the measured phase error.
+         * 
+         * PLL analysis based on https://www.dsprelated.com/showarticle/967.php
+         *   zeta = loop dampening coefficient
+         *   f_n = loop natural frequency
+         *   f_s = NCO base frequency
+         *   k_p = phase detector gain
+         *   k_nco = NCO feedback scaler
+         *   k_l = proportional scaler
+         *   k_i = integral scaler
+         *
+         *   w_n = f_n * 2 * pi
+         *   Ts = 1 / f_s
+         *   k_l = 2 * zeta * w_n * Ts / (k_p * k_nco)
+         *   k_i = w_n^2 * Ts^2 / (k_p * k_nco)
+         *
+         *   Assumptions:
+         *   zeta = 1
+         *   f_s = 72,000,000 Hz
+         *   k_p = 1.0
+         *   k_nco = 1.0
+         *
+         *   Results:
+         *   f_n = 715,000 Hz
+         *   k_l = 0.125 = 1/8
+         *   k_i = 0.003893 ~= 1/256
+         */
+        dma_wr->wr.phase_integral += phase_error / 256;
+        phase_proportional = phase_error / 8;
+
+        dma_wr->wr.phase_step = (uint32_t)((int32_t)(1 << 16) + dma_wr->wr.phase_integral + phase_proportional);
     }
 
     if (bc_prod & 31)
@@ -718,14 +808,12 @@ static void IRQ_wdata_dma(void)
         /* Initialise decoder state for the start of the next write. */
         bc_prod = (bc_prod + 31) & ~31;
         bc_dat = ~0;
-        prev = 0;
     }
 
     /* Save our progress for next time. */
     image->write_bc_window = bc_dat;
     image->bufs.write_bc.prod = bc_prod;
     dma_wr->cons = cons;
-    dma_wr->prev_sample = prev;
 }
 
 /*