Skip to content

Commit 473e8f6

Browse files
committed
floppy: apply NCO-based 2nd-order PLL to WDATA line
Centurion Finch Floppy Controller (FFC, https://github.com/Nakazoto/CenturionComputer/wiki/FFC-Board) is an 8" floppy controller that applies up to 350ns write precompensation. FlashFloppy's existing approach of re-aligning the bit clock to the last pulse edge causes enough additional error for bitcells to be missed or injected in this case. This commit applies a 2nd-order (proportional and integral) PLL to the incoming WDATA to mitigate both frequency offset and extreme write precompensation. The implementation is based on a virtual numerically-controlled oscillator with a frequency 2^16 times FlashFloppy's tick rate. Proportional and integral constants were chosen to nominally provide a 715kHz natural loop frequency and a damping factor of 1 and then adjusted to be powers of 2 to allow computation to use bit shifts instead of multiplication and division. This commit has been successfully tested with a variety of PC-compatible floppy controllers as well as Centurion FFC.
1 parent c6310b8 commit 473e8f6

File tree

1 file changed

+104
-16
lines changed

1 file changed

+104
-16
lines changed

src/floppy_generic.c

Lines changed: 104 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,13 @@
1010
* See the file COPYING for more details, or visit <http://unlicense.org>.
1111
*/
1212

13+
/*
14+
* PLL oscillator runs at 2^16 times the tick rate to allow for lots of
15+
* precision in phase error calcuations. PLL_PHASE_STEP is the initial PLL
16+
* period which sets the PLL frequency equal to an ideal tick.
17+
*/
18+
#define PLL_PHASE_STEP (1 << 16)
19+
1320
/* A DMA buffer for running a timer associated with a floppy-data I/O pin. */
1421
struct dma_ring {
1522
/* Current state of DMA (RDATA):
@@ -33,7 +40,12 @@ struct dma_ring {
3340
uint16_t cons;
3441
union {
3542
uint16_t prod; /* dma_rd: our producer index for flux samples */
36-
uint16_t prev_sample; /* dma_wr: previous CCRx sample value */
43+
struct {
44+
uint32_t phase_step; /* PLL period */
45+
int32_t phase_integral; /* PLL's running sum of phase errors */
46+
uint32_t prev_bc_left; /* PLL timestamp of left edge of previous bitcell */
47+
uint32_t curr_bc_left; /* PLL timestamp of left edge of current bitcell */
48+
} wr;
3749
};
3850
/* DMA ring buffer of timer values (ARR or CCRx). */
3951
uint16_t buf[1024];
@@ -401,6 +413,10 @@ static void wdata_start(void)
401413
}
402414

403415
dma_wr->state = DMA_starting;
416+
dma_wr->wr.phase_step = PLL_PHASE_STEP;
417+
dma_wr->wr.phase_integral = 0;
418+
dma_wr->wr.prev_bc_left = 0;
419+
dma_wr->wr.curr_bc_left = 0;
404420

405421
/* Start timer. */
406422
tim_wdata->egr = TIM_EGR_UG;
@@ -643,13 +659,15 @@ static void IRQ_rdata_dma(void)
643659
static void IRQ_wdata_dma(void)
644660
{
645661
const uint16_t buf_mask = ARRAY_SIZE(dma_rd->buf) - 1;
646-
uint16_t cons, prod, prev, next;
662+
uint16_t cons, prod;
647663
uint32_t bc_dat = 0, bc_prod;
648664
uint32_t *bc_buf = image->bufs.write_bc.p;
649665
unsigned int sync = image->sync;
650666
unsigned int bc_bufmask = (image->bufs.write_bc.len / 4) - 1;
651-
int curr, cell = image->write_bc_ticks;
652667
struct write *write = NULL;
668+
uint32_t next_edge, distance_from_prev_bc_left, distance_from_curr_bc_left;
669+
uint32_t bc_step;
670+
int32_t phase_error, phase_proportional;
653671

654672
/* Clear DMA peripheral interrupts. */
655673
dma1->ifcr = DMA_IFCR_CGIF(dma_wdata_ch);
@@ -669,27 +687,56 @@ static void IRQ_wdata_dma(void)
669687
}
670688

671689
/* Process the flux timings into the raw bitcell buffer. */
672-
prev = dma_wr->prev_sample;
673690
bc_prod = image->bufs.write_bc.prod;
674691
bc_dat = image->write_bc_window;
675692
for (cons = dma_wr->cons; cons != prod; cons = (cons+1) & buf_mask) {
676-
next = dma_wr->buf[cons];
677-
curr = (int16_t)(next - prev) - (cell >> 1);
678-
if (unlikely(curr < 0)) {
679-
/* Runt flux, much shorter than bitcell clock. Merge it forward. */
693+
/* Calculate duration of a bitcell using the PLL's current period */
694+
bc_step = dma_wr->wr.phase_step * (uint32_t)image->write_bc_ticks;
695+
696+
/*
697+
* Incoming sample ticks are shifted up 16-bits to match PLL's internal
698+
* precision
699+
*/
700+
next_edge = (uint32_t)dma_wr->buf[cons] << 16;
701+
702+
/*
703+
* If this is the first pulse after WGATE was asserted, treat it as
704+
* perfectly in phase.
705+
*/
706+
if (dma_wr->wr.prev_bc_left == 0 && dma_wr->wr.curr_bc_left == 0) {
707+
dma_wr->wr.curr_bc_left = next_edge - (bc_step / 2);
708+
dma_wr->wr.prev_bc_left = dma_wr->wr.curr_bc_left - bc_step;
709+
}
710+
711+
/* By computing distance, wraparound is accounted for naturally. */
712+
distance_from_prev_bc_left = next_edge - dma_wr->wr.prev_bc_left;
713+
714+
/* If the next edge would fall within the previous bitcell, ignore it. */
715+
if (distance_from_prev_bc_left < (dma_wr->wr.curr_bc_left - dma_wr->wr.prev_bc_left))
716+
{
680717
continue;
681718
}
682-
prev = next;
683-
while ((curr -= cell) > 0) {
719+
720+
/* Advance to the current bitcell */
721+
distance_from_curr_bc_left = next_edge - dma_wr->wr.curr_bc_left;
722+
723+
/* Record zeros for each bitcell that passed before this pulse */
724+
while (distance_from_curr_bc_left > bc_step)
725+
{
684726
bc_dat <<= 1;
685727
bc_prod++;
686-
if (!(bc_prod&31))
687-
bc_buf[((bc_prod-1) / 32) & bc_bufmask] = htobe32(bc_dat);
728+
729+
if (!(bc_prod & 31))
730+
bc_buf[((bc_prod - 1) / 32) & bc_bufmask] = htobe32(bc_dat);
731+
732+
distance_from_curr_bc_left -= bc_step;
733+
dma_wr->wr.curr_bc_left += bc_step;
688734
}
689-
curr += cell >> 1; /* remove the 1/2-cell bias */
690-
prev -= curr >> 2; /* de-jitter/precomp: carry 1/4 of phase error */
735+
736+
/* Record a one for this bitcell */
691737
bc_dat = (bc_dat << 1) | 1;
692738
bc_prod++;
739+
693740
switch (sync) {
694741
case SYNC_fm:
695742
/* FM clock sync clock byte is 0xc7. Check for:
@@ -704,6 +751,49 @@ static void IRQ_wdata_dma(void)
704751
}
705752
if (!(bc_prod&31))
706753
bc_buf[((bc_prod-1) / 32) & bc_bufmask] = htobe32(bc_dat);
754+
755+
/*
756+
* Figure out the phase error of the current sample before we start
757+
* mucking with state
758+
*/
759+
phase_error = ((int32_t)distance_from_curr_bc_left - ((int32_t)bc_step / 2)) / (int32_t)image->write_bc_ticks;
760+
761+
/* Adjust bitcell history for next iteration */
762+
dma_wr->wr.prev_bc_left = dma_wr->wr.curr_bc_left;
763+
dma_wr->wr.curr_bc_left += bc_step;
764+
765+
/*
766+
* Calculate P and I terms from the measured phase error.
767+
*
768+
* PLL analysis based on https://www.dsprelated.com/showarticle/967.php
769+
* zeta = loop dampening coefficient
770+
* f_n = loop natural frequency
771+
* f_s = NCO base frequency
772+
* k_p = phase detector gain
773+
* k_nco = NCO feedback scaler
774+
* k_l = proportional scaler
775+
* k_i = integral scaler
776+
*
777+
* w_n = f_n * 2 * pi
778+
* Ts = 1 / f_s
779+
* k_l = 2 * zeta * w_n * Ts / (k_p * k_nco)
780+
* k_i = w_n^2 * Ts^2 / (k_p * k_nco)
781+
*
782+
* Assumptions:
783+
* zeta = 1
784+
* f_s = 72,000,000 Hz
785+
* k_p = 1.0
786+
* k_nco = 1.0
787+
*
788+
* Results:
789+
* f_n = 715,000 Hz
790+
* k_l = 0.125 = 1/8
791+
* k_i = 0.003893 ~= 1/256
792+
*/
793+
dma_wr->wr.phase_integral += phase_error / 256;
794+
phase_proportional = phase_error / 8;
795+
796+
dma_wr->wr.phase_step = (uint32_t)((int32_t)(1 << 16) + dma_wr->wr.phase_integral + phase_proportional);
707797
}
708798

709799
if (bc_prod & 31)
@@ -718,14 +808,12 @@ static void IRQ_wdata_dma(void)
718808
/* Initialise decoder state for the start of the next write. */
719809
bc_prod = (bc_prod + 31) & ~31;
720810
bc_dat = ~0;
721-
prev = 0;
722811
}
723812

724813
/* Save our progress for next time. */
725814
image->write_bc_window = bc_dat;
726815
image->bufs.write_bc.prod = bc_prod;
727816
dma_wr->cons = cons;
728-
dma_wr->prev_sample = prev;
729817
}
730818

731819
/*

0 commit comments

Comments
 (0)