Skip to content

Commit 04e25a2

Browse files
drm/rp1: DPI interlace: Improve precision of PIO-generated VSYNC
Instead of trying to minimize the delay between seeing HSYNC edge and asserting VSYNC, try to predict the next HSYNC edge precisely. This eliminates the round-trip delay but introduces mode-dependent rounding error. HSYNC->VSYNC lag reduced from ~30ns to -5ns..+10ns (plus up to 5ns synchronization jitter as before). This may benefit e.g. SCART HATs, particularly those that generate Composite Sync using a XNOR gate. Signed-off-by: Nick Hollinghurst <[email protected]>
1 parent 5985ce3 commit 04e25a2

File tree

1 file changed

+67
-51
lines changed

1 file changed

+67
-51
lines changed

drivers/gpu/drm/rp1/rp1-dpi/rp1_dpi_pio.c

Lines changed: 67 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -29,15 +29,31 @@
2929
#include <linux/pio_rp1.h>
3030

3131
/*
32-
* Start a PIO SM to generate an interrupt just after HSYNC onset, then another
33-
* after a fixed delay (during which we assume HSYNC will have been deasserted).
32+
* Start a PIO SM to generate two interrupts for each horizontal line.
33+
* The first occurs shortly before the middle of the line. The second
34+
* is timed such that after receiving the IRQ plus 1 extra delay cycle,
35+
* another SM's output will align with the next HSYNC within -5ns .. +10ns.
36+
* To achieve this, we need an accurate measure of (cycles per line) / 2.
37+
*
38+
* Measured GPIO -> { wait gpio ; irq set | irq wait ; sideset } -> GPIO
39+
* round-trip delay is about 8 cycles when pins are not heavily loaded.
40+
*
41+
* PIO code ; Notional time % 1000-cycle period
42+
* -------- ; ---------------------------------
43+
* 0: wait 1 gpio 3 ; 0.. 8
44+
* 1: mov x, y ; 8.. 9
45+
* 2: jmp x--, 2 ; 9..499 (Y should be T/2 - 11)
46+
* 3: irq set 1 ; 499..500
47+
* 4: mov x, y [8] ; 500..509
48+
* 5: jmp x--, 5 ; 509..999
49+
* 6: irq set 1 ; 999..1000
3450
*/
3551

3652
static int rp1dpi_pio_start_timer_both(struct rp1_dpi *dpi, u32 flags, u32 tc)
3753
{
38-
static const u16 instructions[2][5] = {
39-
{ 0xa022, 0x2083, 0xc001, 0x0043, 0xc001 }, /* posedge */
40-
{ 0xa022, 0x2003, 0xc001, 0x0043, 0xc001 }, /* negedge */
54+
static const u16 instructions[2][7] = {
55+
{ 0x2083, 0xa022, 0x0042, 0xc001, 0xa822, 0x0045, 0xc001 }, /* +H */
56+
{ 0x2003, 0xa022, 0x0042, 0xc001, 0xa822, 0x0045, 0xc001 }, /* -H */
4157
};
4258
const struct pio_program prog = {
4359
.instructions = instructions[(flags & DRM_MODE_FLAG_NHSYNC) ? 1 : 0],
@@ -51,16 +67,18 @@ static int rp1dpi_pio_start_timer_both(struct rp1_dpi *dpi, u32 flags, u32 tc)
5167
return -EBUSY;
5268

5369
offset = pio_add_program(dpi->pio, &prog);
54-
if (offset == PIO_ORIGIN_ANY)
70+
if (offset == PIO_ORIGIN_ANY) {
71+
pio_sm_unclaim(dpi->pio, sm);
5572
return -EBUSY;
73+
}
5674

5775
pio_sm_config cfg = pio_get_default_sm_config();
5876

5977
pio_sm_set_enabled(dpi->pio, sm, false);
60-
sm_config_set_wrap(&cfg, offset, offset + 4);
78+
sm_config_set_wrap(&cfg, offset, offset + 6);
6179
pio_sm_init(dpi->pio, sm, offset, &cfg);
6280

63-
pio_sm_put(dpi->pio, sm, tc - 4);
81+
pio_sm_put(dpi->pio, sm, tc - 11);
6482
pio_sm_exec(dpi->pio, sm, pio_encode_pull(false, false));
6583
pio_sm_exec(dpi->pio, sm, pio_encode_out(pio_y, 32));
6684
pio_sm_set_enabled(dpi->pio, sm, true);
@@ -74,46 +92,36 @@ static int rp1dpi_pio_start_timer_both(struct rp1_dpi *dpi, u32 flags, u32 tc)
7492
* suitable moment (which should be an odd number of half-lines since the
7593
* last active line), sample DE again to detect field phase.
7694
*
77-
* This version assumes VFP length is within 2..129 half-lines for any field
95+
* This version assumes VFP length is within 2..256 half-lines for any field
7896
* (one half-line delay is needed to sample DE; we always wait for the next
79-
* half-line boundary to improve VSync start accuracy).
97+
* half-line boundary to improve VSync start accuracy) and VBP in 1..255.
8098
*/
8199

82100
static int rp1dpi_pio_vsync_ilace(struct rp1_dpi *dpi,
83101
struct drm_display_mode const *mode)
84102
{
85-
static const int wrap_target = 14;
86-
static const int wrap = 26;
87103
u16 instructions[] = { /* This is mutable */
104+
// .wrap_target
88105
0xa0e6, // 0: mov osr, isr side 0 ; top: rewind parameters
89106
0x2081, // 1: wait 1 gpio, 1 side 0 ; main: while (!DE) wait;
90107
0x2783, // 2: wait 1 gpio, 3 side 0 [7] ; do { @HSync
91108
0xc041, // 3: irq clear 1 side 0 ; flush stale IRQs
92109
0x20c1, // 4: wait 1 irq, 1 side 0 ; @midline
93-
0x00c1, // 5: jmp pin, 1 side 0 ; } while (DE)
110+
0x00c2, // 5: jmp pin, 2 side 0 ; } while (DE)
94111
0x0007, // 6: jmp 7 side 0 ; <modify for -DE fixup>
95-
0x6027, // 7: out x, 7 side 0 ; x = VFPlen - 2
96-
0x000a, // 8: jmp 10 side 0 ; while (x--) {
97-
0x20c1, // 9: wait 1 irq, 1 side 0 ; @halfline
98-
0x0049, // 10: jmp x--, 9 side 0 ; }
99-
0x6021, // 11: out x, 1 side 0 ; test for aligned case
100-
0x003a, // 12: jmp !x, 26 side 0 ; if (!x) goto precise;
101-
0x20c1, // 13: wait 1 irq, 1 side 0 ; @halfline
102-
// .wrap_target ; vsjoin:
103-
0xb722, // 14: mov x, y side 1 [7] ; VSYNC=1; x = VSyncLen
104-
0xd041, // 15: irq clear 1 side 1 ; VSYNC=1; flush stale IRQs
105-
0x30c1, // 16: wait 1 irq, 1 side 1 ; VSYNC=1; do { @halfline
106-
0x1050, // 17: jmp x--, 16 side 1 ; VSYNC=1; } while (x--)
107-
0x6028, // 18: out x, 8 side 0 ; VSYNC=0; x = VBPLen
108-
0x0015, // 19: jmp 21 side 0 ; while (x--) {
109-
0x20c1, // 20: wait 1 irq, 1 side 0 ; @halfline
110-
0x0054, // 21: jmp x--, 20 side 0 ; }
111-
0x00c0, // 22: jmp pin, 0 side 0 ; if (DE) reset phase
112-
0x0018, // 23: jmp 24 side 0 ; <modify for -DE fixup>
113-
0x00e1, // 24: jmp !osre, 1 side 0 ; if (!phase) goto main
114-
0x0000, // 25: jmp 0 side 0 ; goto top
115-
0x2083, // 26: wait 1 gpio, 3 side 0 ; precise: @HSync
116-
// .wrap ; goto vsjoin
112+
0x6028, // 7: out x, 8 side 0 ; x = VFPlen - 2
113+
0x20c1, // 8: wait 1 irq, 1 side 0 ; do { @halfline
114+
0x0048, // 9: jmp x--, 8 side 0 ; } while (x--)
115+
0xb022, // 10: mov x, y side 1 ; VSYNC=1; x = VSyncLen
116+
0x30c1, // 11: wait 1 irq, 1 side 1 ; VSYNC=1; do { @halfline
117+
0x104b, // 12: jmp x--, 11 side 1 ; VSYNC=1; } while (x--)
118+
0x6028, // 13: out x, 8 side 0 ; VSYNC=0; x = VBPLen - 1
119+
0x20c1, // 14: wait 1 irq, 1 side 0 ; do { @halfline
120+
0x004e, // 15: jmp x--, 14 side 0 ; } while (x--)
121+
0x00c0, // 16: jmp pin, 0 side 0 ; if (DE) reset phase
122+
0x0012, // 17: jmp 18 side 0 ; <modify for -DE fixup>
123+
0x00e1, // 18: jmp !osre, 1 side 0 ; if (!phase) goto main
124+
// .wrap ; goto top
117125
};
118126
struct pio_program prog = {
119127
.instructions = instructions,
@@ -129,8 +137,16 @@ static int rp1dpi_pio_vsync_ilace(struct rp1_dpi *dpi,
129137
if (sm < 0)
130138
return -EBUSY;
131139

132-
/* Compute mid-line time constant and start the timer SM */
133-
tc = (mode->htotal * (u64)sysclk) / (u64)(2000u * mode->clock);
140+
/*
141+
* Compute half-line time constant (round uppish so that VSync should
142+
* switch never > 5ns before DPICLK, while defeating roundoff errors)
143+
* and start the timer SM.
144+
*/
145+
tc = (u32)clk_get_rate(dpi->clocks[RP1DPI_CLK_DPI]);
146+
if (!tc)
147+
tc = 1000u * mode->clock;
148+
tc = ((u64)mode->htotal * (u64)sysclk + ((7ul * tc) >> 2)) /
149+
(u64)(2ul * tc);
134150
if (rp1dpi_pio_start_timer_both(dpi, mode->flags, tc) < 0) {
135151
pio_sm_unclaim(dpi->pio, sm);
136152
return -EBUSY;
@@ -141,23 +157,23 @@ static int rp1dpi_pio_vsync_ilace(struct rp1_dpi *dpi,
141157
if (dpi->de_inv) {
142158
instructions[1] ^= 0x0080;
143159
instructions[5] = 0x00c7;
144-
instructions[6] = 0x0001;
145-
instructions[22] = 0x00d8;
146-
instructions[23] = 0x0000;
160+
instructions[6] = 0x0002;
161+
instructions[16] = 0x00d2;
162+
instructions[17] = 0x0000;
147163
}
148-
for (i = 0; i < ARRAY_SIZE(instructions); i++) {
149-
if (mode->flags & DRM_MODE_FLAG_NVSYNC)
164+
if (mode->flags & DRM_MODE_FLAG_NHSYNC)
165+
instructions[2] ^= 0x0080;
166+
if (mode->flags & DRM_MODE_FLAG_NVSYNC) {
167+
for (i = 0; i < ARRAY_SIZE(instructions); i++)
150168
instructions[i] ^= 0x1000;
151-
if ((mode->flags & DRM_MODE_FLAG_NHSYNC) && (instructions[i] & 0xe07f) == 0x2003)
152-
instructions[i] ^= 0x0080;
153169
}
154170
offset = pio_add_program(dpi->pio, &prog);
155171
if (offset == PIO_ORIGIN_ANY)
156172
return -EBUSY;
157173

158174
/* Configure pins and SM */
159175
dpi->pio_stole_gpio2 = true;
160-
sm_config_set_wrap(&cfg, offset + wrap_target, offset + wrap);
176+
sm_config_set_wrap(&cfg, offset, offset + ARRAY_SIZE(instructions) - 1);
161177
sm_config_set_sideset(&cfg, 1, false, false);
162178
sm_config_set_sideset_pins(&cfg, 2);
163179
pio_gpio_init(dpi->pio, 2);
@@ -168,17 +184,17 @@ static int rp1dpi_pio_vsync_ilace(struct rp1_dpi *dpi,
168184
/* Compute vertical times, remembering how we rounded vdisplay, vtotal */
169185
vfp = mode->vsync_start - (mode->vdisplay & ~1);
170186
vbp = (mode->vtotal | 1) - mode->vsync_end;
171-
if (vfp > 128) {
172-
vbp += vfp - 128;
173-
vfp = 128;
187+
if (vfp > 256) {
188+
vbp += vfp - 256;
189+
vfp = 256;
174190
} else if (vfp < 3) {
175-
vbp = (vbp > 3 - vfp) ? (vbp - 3 + vfp) : 0;
191+
vbp = (vbp > 3 - vfp) ? (vbp - 3 + vfp) : 1;
176192
vfp = 3;
177193
}
178194

179195
pio_sm_put(dpi->pio, sm,
180-
(vfp - 2) + ((vfp & 1) << 7) + (vbp << 8) +
181-
((vfp - 3) << 16) + (((~vfp) & 1) << 23) + ((vbp + 1) << 24));
196+
(vfp - 2) + ((vbp - 1) << 8) +
197+
((vfp - 3) << 16) + (vbp << 24));
182198
pio_sm_put(dpi->pio, sm, mode->vsync_end - mode->vsync_start - 1);
183199
pio_sm_exec(dpi->pio, sm, pio_encode_pull(false, false));
184200
pio_sm_exec(dpi->pio, sm, pio_encode_out(pio_y, 32));

0 commit comments

Comments
 (0)