Skip to content

Commit a8ca18f

Browse files
author
marqs
committed
timing optimizations
* add extra pipeline stage on low priority mm_interconnect_2 * add buffer between 2 critical scanline calculation stages * split OSD render calculations across multiple cycles
1 parent 74ba38f commit a8ca18f

File tree

5 files changed

+63
-39
lines changed

5 files changed

+63
-39
lines changed

ip/osd_generator/osd_generator_top.sv

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,9 @@ reg [31:0] config_reg[OSD_ROW_LSEC_ENABLE_REGNUM:OSD_ROW_COLOR_REGNUM] /* synthe
5959
reg [11:0] xpos_osd_area_scaled, xpos_text_scaled;
6060
reg [10:0] ypos_osd_area_scaled, ypos_text_scaled;
6161
reg [7:0] x_ptr[2:5], y_ptr[2:5] /* synthesis ramstyle = "logic" */;
62-
reg osd_text_act_pp[2:6], osd_act_pp[3:6];
62+
reg osd_text_act_lsec_x_hit, osd_text_act_lsec_en, osd_text_act_rsec_x_hit, osd_text_act_rsec_en, osd_text_act_y_hit;
63+
reg osd_act_lsec_x_hit, osd_act_lsec_en, osd_act_rsec_x_hit, osd_act_rsec_en, osd_act_y_hit;
64+
reg osd_text_act_pp[3:6], osd_act_pp[4:6];
6365
reg [14:0] to_ctr, to_ctr_ms;
6466
reg char_px;
6567

@@ -124,21 +126,27 @@ always @(posedge vclk) begin
124126
y_ptr[pp_idx] <= y_ptr[pp_idx-1];
125127
end
126128

127-
osd_text_act_pp[2] <= render_enable &
129+
osd_text_act_lsec_x_hit <= (xpos_text_scaled < 8*CHAR_COLS);
130+
osd_text_act_lsec_en <= config_reg[OSD_ROW_LSEC_ENABLE_REGNUM][ypos_text_scaled/8];
131+
osd_text_act_rsec_x_hit <= (xpos_text_scaled >= 8*(CHAR_COLS+CHAR_SEC_SEPARATOR)) & (xpos_text_scaled < 8*(2*CHAR_COLS+CHAR_SEC_SEPARATOR));
132+
osd_text_act_rsec_en <= config_reg[OSD_ROW_RSEC_ENABLE_REGNUM][ypos_text_scaled/8];
133+
osd_text_act_y_hit <= (ypos_text_scaled < 8*CHAR_ROWS);
134+
osd_text_act_pp[3] <= render_enable &
128135
(menu_active || (to_ctr_ms > 0)) &
129-
(((xpos_text_scaled < 8*CHAR_COLS) & config_reg[OSD_ROW_LSEC_ENABLE_REGNUM][ypos_text_scaled/8]) |
130-
((xpos_text_scaled >= 8*(CHAR_COLS+CHAR_SEC_SEPARATOR)) & (xpos_text_scaled < 8*(2*CHAR_COLS+CHAR_SEC_SEPARATOR)) & config_reg[OSD_ROW_RSEC_ENABLE_REGNUM][ypos_text_scaled/8])) &
131-
(ypos_text_scaled < 8*CHAR_ROWS);
132-
for(pp_idx = 3; pp_idx <= 6; pp_idx = pp_idx+1) begin
136+
((osd_text_act_lsec_x_hit & osd_text_act_lsec_en) | (osd_text_act_rsec_x_hit & osd_text_act_rsec_en)) & osd_text_act_y_hit;
137+
for(pp_idx = 4; pp_idx <= 6; pp_idx = pp_idx+1) begin
133138
osd_text_act_pp[pp_idx] <= osd_text_act_pp[pp_idx-1];
134139
end
135140

136-
osd_act_pp[3] <= render_enable &
141+
osd_act_lsec_x_hit <= (xpos_osd_area_scaled/8 < (CHAR_COLS+1));
142+
osd_act_lsec_en <= config_reg[OSD_ROW_LSEC_ENABLE_REGNUM][(ypos_osd_area_scaled/8) ? ((ypos_osd_area_scaled/8)-1) : 0];
143+
osd_act_rsec_x_hit <= (xpos_osd_area_scaled/8 >= (CHAR_COLS+1)) & (xpos_osd_area_scaled/8 < (2*CHAR_COLS+CHAR_SEC_SEPARATOR+1));
144+
osd_act_rsec_en <= (config_reg[OSD_ROW_RSEC_ENABLE_REGNUM][(ypos_osd_area_scaled/8)-1] | config_reg[OSD_ROW_RSEC_ENABLE_REGNUM][ypos_osd_area_scaled/8]);
145+
osd_act_y_hit <= (ypos_osd_area_scaled < 8*(CHAR_ROWS+1));
146+
osd_act_pp[4] <= render_enable &
137147
(menu_active || (to_ctr_ms > 0)) &
138-
(((xpos_osd_area_scaled/8 < (CHAR_COLS+1)) & config_reg[OSD_ROW_LSEC_ENABLE_REGNUM][(ypos_osd_area_scaled/8) ? ((ypos_osd_area_scaled/8)-1) : 0]) |
139-
((xpos_osd_area_scaled/8 >= (CHAR_COLS+1)) & (xpos_osd_area_scaled/8 < (2*CHAR_COLS+CHAR_SEC_SEPARATOR+1)) & (config_reg[OSD_ROW_RSEC_ENABLE_REGNUM][(ypos_osd_area_scaled/8)-1] | config_reg[OSD_ROW_RSEC_ENABLE_REGNUM][ypos_osd_area_scaled/8]))) &
140-
(ypos_osd_area_scaled < 8*(CHAR_ROWS+1));
141-
for(pp_idx = 4; pp_idx <= 6; pp_idx = pp_idx+1) begin
148+
((osd_act_lsec_x_hit & osd_act_lsec_en) | (osd_act_rsec_x_hit & osd_act_rsec_en)) & osd_act_y_hit;
149+
for(pp_idx = 5; pp_idx <= 6; pp_idx = pp_idx+1) begin
142150
osd_act_pp[pp_idx] <= osd_act_pp[pp_idx-1];
143151
end
144152

ossc_pro.qsf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -629,7 +629,7 @@ set_global_assignment -name UNIPHY_SEQUENCER_DQS_CONFIG_ENABLE ON
629629

630630
set_global_assignment -name SMART_RECOMPILE ON
631631

632-
set_global_assignment -name SEED 2
632+
set_global_assignment -name SEED 1
633633

634634
set_global_assignment -name STRATIXIII_UPDATE_MODE REMOTE
635635

ossc_pro.sdc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ set_false_path -from {sys:sys_inst|sys_pio_0:pio_0|data_out[3] sys:sys_inst|sys_
186186
set_false_path -to sys:sys_inst|sys_pio_1:pio_2|readdata[0]
187187
set_false_path -to sys:sys_inst|sys_pio_1:pio_2|readdata[1]
188188
set_false_path -to sys:sys_inst|sys_pio_1:pio_2|readdata[2]
189-
set_false_path -setup -to [get_registers sys:sys_inst|sys_alt_vip_cl_cvo_0:alt_vip_cl_cvo_0|alt_vip_cvo_core:cvo_core|alt_vip_cvo_sync_conditioner:pixel_channel_sync_conditioner|alt_vip_common_sync_generation:sync_generation_generate.sync_generation|sof*]
189+
set_max_delay 12 -to [get_registers sys:sys_inst|sys_alt_vip_cl_cvo_0:alt_vip_cl_cvo_0|alt_vip_cvo_core:cvo_core|alt_vip_cvo_sync_conditioner:pixel_channel_sync_conditioner|alt_vip_common_sync_generation:sync_generation_generate.sync_generation|sof*]
190190

191191

192192
### JTAG Signal Constraints ###

rtl/scanconverter.v

Lines changed: 34 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
//
2-
// Copyright (C) 2019-2023 Markus Hiienkari <mhiienka@niksula.hut.fi>
2+
// Copyright (C) 2019-2025 Markus Hiienkari <mhiienka@niksula.hut.fi>
33
//
44
// This file is part of Open Source Scan Converter project.
55
//
@@ -103,8 +103,8 @@ localparam PP_SHMASK_END = PP_SHMASK_START + PP_SHMASK_LENGTH;
103103
localparam PP_Y_CALC_START = PP_SRCSEL_END;
104104
localparam PP_Y_CALC_LENGTH = 2;
105105
localparam PP_Y_CALC_END = PP_Y_CALC_START + PP_Y_CALC_LENGTH;
106-
localparam PP_SLGEN_START = PP_Y_CALC_END;
107-
localparam PP_SLGEN_LENGTH = 5;
106+
localparam PP_SLGEN_START = PP_Y_CALC_END-1;
107+
localparam PP_SLGEN_LENGTH = 6;
108108
localparam PP_SLGEN_END = PP_SLGEN_START + PP_SLGEN_LENGTH;
109109
localparam PP_TP_START = PP_SLGEN_END;
110110
localparam PP_TP_LENGTH = 1;
@@ -182,7 +182,7 @@ reg line_id;
182182
reg ypos_pp_init, ypos_lb_repeatfirst;
183183

184184
reg sl_method;
185-
reg [3:0] sl_str;
185+
reg [3:0] sl_str, sl_str_q;
186186
reg [7:0] sl_str_thold, Y_sl_str, R_sl_str, G_sl_str, B_sl_str;
187187
wire [7:0] R_sl_mult, G_sl_mult, B_sl_mult;
188188
reg [3:0] bfi_frame_ctr;
@@ -214,6 +214,7 @@ reg [3:0] y_ctr_sl_pp[PP_PL_START:PP_SLGEN_START] /* synthesis ramstyle = "logic
214214
assign PCLK_o = PCLK_OUT_i;
215215

216216

217+
// SLGEN cycle 2
217218
lpm_mult_8x5_9 Y_sl_hybr_ref_pre_u
218219
(
219220
.clock(PCLK_OUT_i),
@@ -224,76 +225,79 @@ lpm_mult_8x5_9 Y_sl_hybr_ref_pre_u
224225
lpm_mult_8x5_9 R_sl_hybr_ref_pre_u
225226
(
226227
.clock(PCLK_OUT_i),
227-
.dataa(R_pp[PP_SLGEN_START]),
228+
.dataa(R_pp[PP_SLGEN_START+1]),
228229
.datab(SL_HYBRSTR),
229230
.result(R_sl_hybr_ref_pre)
230231
);
231232
lpm_mult_8x5_9 G_sl_hybr_ref_pre_u
232233
(
233234
.clock(PCLK_OUT_i),
234-
.dataa(G_pp[PP_SLGEN_START]),
235+
.dataa(G_pp[PP_SLGEN_START+1]),
235236
.datab(SL_HYBRSTR),
236237
.result(G_sl_hybr_ref_pre)
237238
);
238239
lpm_mult_8x5_9 B_sl_hybr_ref_pre_u
239240
(
240241
.clock(PCLK_OUT_i),
241-
.dataa(B_pp[PP_SLGEN_START]),
242+
.dataa(B_pp[PP_SLGEN_START+1]),
242243
.datab(SL_HYBRSTR),
243244
.result(B_sl_hybr_ref_pre)
244245
);
245246

247+
// SLGEN cycle 3
246248
lpm_mult_8x5_9 Y_sl_hybr_ref_u
247249
(
248250
.clock(PCLK_OUT_i),
249251
.dataa(Y_sl_hybr_ref_pre[8:1]),
250-
.datab({sl_str, 1'b0}),
252+
.datab({sl_str_q, 1'b0}),
251253
.result(Y_sl_hybr_ref)
252254
);
253255
lpm_mult_8x5_9 R_sl_hybr_ref_u
254256
(
255257
.clock(PCLK_OUT_i),
256258
.dataa(R_sl_hybr_ref_pre[8:1]),
257-
.datab({sl_str, 1'b0}),
259+
.datab({sl_str_q, 1'b0}),
258260
.result(R_sl_hybr_ref)
259261
);
260262
lpm_mult_8x5_9 G_sl_hybr_ref_u
261263
(
262264
.clock(PCLK_OUT_i),
263265
.dataa(G_sl_hybr_ref_pre[8:1]),
264-
.datab({sl_str, 1'b0}),
266+
.datab({sl_str_q, 1'b0}),
265267
.result(G_sl_hybr_ref)
266268
);
267269
lpm_mult_8x5_9 B_sl_hybr_ref_u
268270
(
269271
.clock(PCLK_OUT_i),
270272
.dataa(B_sl_hybr_ref_pre[8:1]),
271-
.datab({sl_str, 1'b0}),
273+
.datab({sl_str_q, 1'b0}),
272274
.result(B_sl_hybr_ref)
273275
);
274276

277+
// SLGEN cycle 5
275278
lpm_mult_sl R_sl_mult_u
276279
(
277280
.clock(PCLK_OUT_i),
278-
.dataa(R_pp[PP_SLGEN_START+3]),
281+
.dataa(R_pp[PP_SLGEN_START+4]),
279282
.datab(~Y_sl_str),
280283
.result(R_sl_mult)
281284
);
282285
lpm_mult_sl G_sl_mult_u
283286
(
284287
.clock(PCLK_OUT_i),
285-
.dataa(G_pp[PP_SLGEN_START+3]),
288+
.dataa(G_pp[PP_SLGEN_START+4]),
286289
.datab(~Y_sl_str),
287290
.result(G_sl_mult)
288291
);
289292
lpm_mult_sl B_sl_mult_u
290293
(
291294
.clock(PCLK_OUT_i),
292-
.dataa(B_pp[PP_SLGEN_START+3]),
295+
.dataa(B_pp[PP_SLGEN_START+4]),
293296
.datab(~Y_sl_str),
294297
.result(B_sl_mult)
295298
);
296299

300+
// SHMASK cycle 2
297301
lpm_mult_8x5_9 R_shmask_mult_u
298302
(
299303
.clock(PCLK_OUT_i),
@@ -416,7 +420,7 @@ end
416420
// | | | | SRCSEL | | | | | | | | |
417421
// | | SHM_BUF | SHM_BUF | SHMASK | SHMASK | SHMASK | | | | | | |
418422
// | | | | | Y | Y | | | | | | |
419-
// | | | | | | | SLGEN | SLGEN | SLGEN | SLGEN | SLGEN | |
423+
// | | | | | | SLGEN | SLGEN | SLGEN | SLGEN | SLGEN | SLGEN | |
420424
// | | | | | | | | | | | | TP |
421425

422426

@@ -570,7 +574,7 @@ always @(posedge PCLK_OUT_i) begin
570574
G_pp[PP_SHMASK_END] <= MISC_SHMASK_ENABLE ? (G_shmask_mult[8] ? 8'hff : G_shmask_mult[7:0]) : G_pp[PP_SHMASK_START+2];
571575
B_pp[PP_SHMASK_END] <= MISC_SHMASK_ENABLE ? (B_shmask_mult[8] ? 8'hff : B_shmask_mult[7:0]) : B_pp[PP_SHMASK_START+2];
572576

573-
/* ---------- Scanline generation (5 cycles) ---------- */
577+
/* ---------- Scanline generation (6 cycles) ---------- */
574578
if (bfi_frame_ctr > MISC_BFI_THOLD) begin
575579
sl_str <= MISC_BFI_STR;
576580
sl_method <= 1'b1;
@@ -591,24 +595,28 @@ always @(posedge PCLK_OUT_i) begin
591595
end
592596

593597
// Cycle 2
594-
sl_str_thold <= ((sl_str+8'h01)<<4)-1'b1;
598+
// register inferred in DSP block (sl_hybr_ref), avoid critical timing path due to routing delays
599+
sl_str_q <= sl_str;
595600

596601
// Cycle 3
602+
sl_str_thold <= ((sl_str_q+8'h01)<<4)-1'b1;
603+
604+
// Cycle 4
597605
Y_sl_str <= ({1'b0, sl_str_thold} < Y_sl_hybr_ref) ? 8'h0 : sl_str_thold - Y_sl_hybr_ref[7:0];
598606
R_sl_str <= ({1'b0, sl_str_thold} < R_sl_hybr_ref) ? 8'h0 : sl_str_thold - R_sl_hybr_ref[7:0];
599607
G_sl_str <= ({1'b0, sl_str_thold} < G_sl_hybr_ref) ? 8'h0 : sl_str_thold - G_sl_hybr_ref[7:0];
600608
B_sl_str <= ({1'b0, sl_str_thold} < B_sl_hybr_ref) ? 8'h0 : sl_str_thold - B_sl_hybr_ref[7:0];
601609

602-
// Cycle 4
603-
// store subtraction based scanlined RGB into pipeline registers
604-
R_pp[PP_SLGEN_START+4] <= draw_sl_pp[PP_SLGEN_START+3] ? ((R_pp[PP_SLGEN_START+3] > R_sl_str) ? (R_pp[PP_SLGEN_START+3] - R_sl_str) : 8'h00) : R_pp[PP_SLGEN_START+3];
605-
G_pp[PP_SLGEN_START+4] <= draw_sl_pp[PP_SLGEN_START+3] ? ((G_pp[PP_SLGEN_START+3] > G_sl_str) ? (G_pp[PP_SLGEN_START+3] - G_sl_str) : 8'h00) : G_pp[PP_SLGEN_START+3];
606-
B_pp[PP_SLGEN_START+4] <= draw_sl_pp[PP_SLGEN_START+3] ? ((B_pp[PP_SLGEN_START+3] > B_sl_str) ? (B_pp[PP_SLGEN_START+3] - B_sl_str) : 8'h00) : B_pp[PP_SLGEN_START+3];
607-
608610
// Cycle 5
609-
R_pp[PP_SLGEN_END] <= (draw_sl_pp[PP_SLGEN_START+4] & sl_method) ? R_sl_mult : R_pp[PP_SLGEN_START+4];
610-
G_pp[PP_SLGEN_END] <= (draw_sl_pp[PP_SLGEN_START+4] & sl_method) ? G_sl_mult : G_pp[PP_SLGEN_START+4];
611-
B_pp[PP_SLGEN_END] <= (draw_sl_pp[PP_SLGEN_START+4] & sl_method) ? B_sl_mult : B_pp[PP_SLGEN_START+4];
611+
// store subtraction based scanlined RGB into pipeline registers
612+
R_pp[PP_SLGEN_START+5] <= draw_sl_pp[PP_SLGEN_START+4] ? ((R_pp[PP_SLGEN_START+4] > R_sl_str) ? (R_pp[PP_SLGEN_START+4] - R_sl_str) : 8'h00) : R_pp[PP_SLGEN_START+4];
613+
G_pp[PP_SLGEN_START+5] <= draw_sl_pp[PP_SLGEN_START+4] ? ((G_pp[PP_SLGEN_START+4] > G_sl_str) ? (G_pp[PP_SLGEN_START+4] - G_sl_str) : 8'h00) : G_pp[PP_SLGEN_START+4];
614+
B_pp[PP_SLGEN_START+5] <= draw_sl_pp[PP_SLGEN_START+4] ? ((B_pp[PP_SLGEN_START+4] > B_sl_str) ? (B_pp[PP_SLGEN_START+4] - B_sl_str) : 8'h00) : B_pp[PP_SLGEN_START+4];
615+
616+
// Cycle 6
617+
R_pp[PP_SLGEN_END] <= (draw_sl_pp[PP_SLGEN_START+5] & sl_method) ? R_sl_mult : R_pp[PP_SLGEN_START+5];
618+
G_pp[PP_SLGEN_END] <= (draw_sl_pp[PP_SLGEN_START+5] & sl_method) ? G_sl_mult : G_pp[PP_SLGEN_START+5];
619+
B_pp[PP_SLGEN_END] <= (draw_sl_pp[PP_SLGEN_START+5] & sl_method) ? B_sl_mult : B_pp[PP_SLGEN_START+5];
612620

613621
/* ---------- Testpattern / mask generation ---------- */
614622
R_pp[PP_TP_END] <= testpattern_enable ? (xpos_pp[PP_TP_START] ^ ypos_pp[PP_TP_START]) : (mask_enable_pp[PP_TP_START] ? MASK_R : R_pp[PP_TP_START]);

sys.qsys

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2902,4 +2902,12 @@
29022902
<interconnectRequirement for="$system" name="qsys_mm.clockCrossingAdapter" value="HANDSHAKE" />
29032903
<interconnectRequirement for="$system" name="qsys_mm.insertDefaultSlave" value="false" />
29042904
<interconnectRequirement for="$system" name="qsys_mm.maxAdditionalLatency" value="1" />
2905+
<interconnectRequirement
2906+
for="mm_interconnect_2|mm_clock_crossing_bridge_2_m0_limiter.cmd_src/cmd_demux.sink"
2907+
name="qsys_mm.postTransform.pipelineCount"
2908+
value="1" />
2909+
<interconnectRequirement
2910+
for="mm_interconnect_2|rsp_mux.src/mm_clock_crossing_bridge_2_m0_limiter.rsp_sink"
2911+
name="qsys_mm.postTransform.pipelineCount"
2912+
value="1" />
29052913
</system>

0 commit comments

Comments
 (0)