Skip to content

Commit 2e79b95

Browse files
author
Andrea Belano
committed
Fix errors with stalls w/ quantized weights
1 parent a72eeca commit 2e79b95

File tree

3 files changed

+50
-40
lines changed

3 files changed

+50
-40
lines changed

rtl/redmule_scheduler.sv

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ module redmule_scheduler
3737

3838
input logic wq_valid_i ,
3939
input logic zeros_valid_i ,
40+
input logic next_wrow_valid_i ,
4041

4142
input logic engine_flush_i ,
4243

@@ -159,7 +160,7 @@ module redmule_scheduler
159160
end
160161
end
161162

162-
assign x_done_en = x_rows_iter_en && x_rows_iter_q == reg_file_i.hwpe_params[X_ITERS][31:16]-1;
163+
assign x_done_en = flgs_streamer_i.x_stream_source_flags.ready_start && x_rows_iter_q == reg_file_i.hwpe_params[X_ITERS][31:16]-1 && x_w_iters_q == reg_file_i.hwpe_params[W_ITERS][15:0]-1 && x_cols_iter_q == reg_file_i.hwpe_params[X_ITERS][15:0]-1;
163164

164165
assign cntrl_x_buffer_o.height = x_cols_iter_q == reg_file_i.hwpe_params[X_ITERS][15:0]-1 && reg_file_i.hwpe_params[LEFTOVERS][23:16] != '0 ? reg_file_i.hwpe_params[LEFTOVERS][23:16] : D;
165166
assign cntrl_x_buffer_o.slots = x_cols_iter_q == reg_file_i.hwpe_params[X_ITERS][15:0]-1 && reg_file_i.hwpe_params[LEFTOVERS][23:16] != '0 ? reg_file_i.hwpe_params[X_SLOTS] : D;
@@ -170,6 +171,7 @@ module redmule_scheduler
170171
******************************/
171172
logic [$clog2(H-1)-1:0] x_shift_cnt_d, x_shift_cnt_q;
172173
logic x_shift_cnt_en;
174+
logic [$clog2(H-1)-1:0] x_shift_offs_q;
173175

174176
always_ff @(posedge clk_i or negedge rst_ni) begin : x_shift_counter
175177
if(~rst_ni) begin
@@ -182,6 +184,14 @@ module redmule_scheduler
182184
end
183185
end
184186

187+
always_ff @(posedge clk_i or negedge rst_ni) begin : x_shift_offset
188+
if(~rst_ni) begin
189+
x_shift_offs_q <= '0;
190+
end else if (flgs_x_buffer_i.full && flgs_x_buffer_i.empty) begin
191+
x_shift_offs_q <= x_shift_cnt_q + x_shift_offs_q;
192+
end
193+
end
194+
185195
assign x_shift_cnt_en = (current_state == LOAD_W) && ~stall_engine;
186196
assign x_shift_cnt_d = x_shift_cnt_q == H-1 ? '0 : x_shift_cnt_q + 1;
187197

@@ -616,15 +626,16 @@ module redmule_scheduler
616626

617627
// Check if the x buffer is full
618628
// Only enable this check when a new set of x columns is to be loaded
629+
// This check is performed one cycle in earlier (i.e. during the WAIT state) as the X buffer takes one cycle after the full signal is asserted to actually update the outputs
619630
assign check_x_full = flgs_x_buffer_i.full;
620-
assign check_x_full_en = x_refill && x_shift_cnt_q == H-1 && ~x_done;
631+
assign check_x_full_en = x_refill && x_shift_cnt_q == (H-1 - x_shift_offs_q) && ~x_done;
621632

622633
// Check if the new Y rows are loaded and ready to be pushed
623634
// Only enable this check when the results of an iteration are available
624635
assign check_y_loaded = flgs_z_buffer_i.loaded;
625636
assign check_y_loaded_en = z_wait_counter_q == PIPE_REGS && ~w_done;
626637

627-
assign check_quant_valid = (zeros_valid_i || flgs_w_buffer_i.gid_repeated) && wq_valid_i;
638+
assign check_quant_valid = (zeros_valid_i || flgs_w_buffer_i.gid_repeated) && wq_valid_i && (next_wrow_valid_i || current_state != LOAD_W || x_done_en);
628639
assign check_quant_valid_en = ~w_done && reg_file_i.hwpe_params[DEQUANT_MODE][0];
629640

630641
/******************************
@@ -635,17 +646,17 @@ module redmule_scheduler
635646
~check_x_full && check_x_full_en
636647
) : current_state == LOAD_W && (
637648
~check_w_valid && check_w_valid_en ||
638-
~check_x_full && check_x_full_en ||
639649
~check_y_loaded && check_y_loaded_en ||
640650
~check_quant_valid && check_quant_valid_en
641-
) || z_wait_counter_q == PIPE_REGS && flgs_z_buffer_i.z_priority;
651+
) || z_wait_counter_q == PIPE_REGS && flgs_z_buffer_i.z_priority
652+
|| current_state == WAIT && ~check_x_full && check_x_full_en;
642653
`else
643654
assign stall_engine = current_state == LOAD_W && (
644655
~check_w_valid && check_w_valid_en ||
645-
~check_x_full && check_x_full_en ||
646656
~check_y_loaded && check_y_loaded_en ||
647657
~check_quant_valid && check_quant_valid_en
648-
) || z_wait_counter_q == PIPE_REGS && flgs_z_buffer_i.z_priority;
658+
) || z_wait_counter_q == PIPE_REGS && flgs_z_buffer_i.z_priority
659+
|| current_state == WAIT && ~check_x_full && check_x_full_en;
649660
`endif
650661

651662
always_ff @(posedge clk_i or negedge rst_ni) begin : first_load_register
@@ -684,10 +695,12 @@ module redmule_scheduler
684695
if(~rst_ni) begin
685696
x_refill <= '0;
686697
end else begin
687-
if (clear_i || cntrl_scheduler_i.rst || cntrl_x_buffer_o.rst_w_index) begin
698+
if (clear_i || cntrl_scheduler_i.rst) begin
688699
x_refill <= '0;
689700
end else if (flgs_x_buffer_i.empty) begin
690701
x_refill <= '1;
702+
end else if (cntrl_x_buffer_o.rst_w_index) begin
703+
x_refill <= '0;
691704
end
692705
end
693706
end

rtl/redmule_top.sv

Lines changed: 26 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -716,31 +716,32 @@ redmule_scheduler #(
716716
.NumPipeRegs ( NumPipeRegs ),
717717
.GID_WIDTH ( GidxWidth )
718718
) i_scheduler (
719-
.clk_i ( clk_i ),
720-
.rst_ni ( rst_ni ),
721-
.test_mode_i ( test_mode_i ),
722-
.clear_i ( clear ),
723-
.x_valid_i ( x_buffer_fifo.valid ),
724-
.w_valid_i ( w_buffer_fifo.valid ),
725-
.y_valid_i ( y_buffer_fifo.valid ),
726-
.z_ready_i ( z_buffer_q.ready ),
727-
.wq_valid_i ( wq_buffer_fifo.valid ),
728-
.zeros_valid_i ( zeros_buffer_fifo.valid ),
729-
.engine_flush_i ( engine_flush ),
730-
.reg_file_i ( reg_file ),
731-
.flgs_streamer_i ( flgs_streamer ),
732-
.flgs_x_buffer_i ( x_buffer_flgs ),
733-
.flgs_w_buffer_i ( w_buffer_flgs ),
734-
.flgs_z_buffer_i ( z_buffer_flgs ),
735-
.flgs_engine_i ( flgs_engine ),
736-
.cntrl_scheduler_i ( cntrl_scheduler ),
737-
.reg_enable_o ( reg_enable ),
738-
.cntrl_engine_o ( cntrl_engine ),
739-
.cntrl_x_buffer_o ( x_buffer_ctrl ),
740-
.cntrl_w_buffer_o ( w_buffer_ctrl ),
741-
.cntrl_z_buffer_o ( z_buffer_ctrl ),
742-
.cntrl_gidx_buffer_o ( gidx_buffer_ctrl ),
743-
.flgs_scheduler_o ( flgs_scheduler )
719+
.clk_i ( clk_i ),
720+
.rst_ni ( rst_ni ),
721+
.test_mode_i ( test_mode_i ),
722+
.clear_i ( clear ),
723+
.x_valid_i ( x_buffer_fifo.valid ),
724+
.w_valid_i ( w_buffer_fifo.valid ),
725+
.y_valid_i ( y_buffer_fifo.valid ),
726+
.z_ready_i ( z_buffer_q.ready ),
727+
.wq_valid_i ( wq_buffer_fifo.valid ),
728+
.zeros_valid_i ( zeros_buffer_fifo.valid ),
729+
.next_wrow_valid_i ( x_buffer_next_row_q.valid ),
730+
.engine_flush_i ( engine_flush ),
731+
.reg_file_i ( reg_file ),
732+
.flgs_streamer_i ( flgs_streamer ),
733+
.flgs_x_buffer_i ( x_buffer_flgs ),
734+
.flgs_w_buffer_i ( w_buffer_flgs ),
735+
.flgs_z_buffer_i ( z_buffer_flgs ),
736+
.flgs_engine_i ( flgs_engine ),
737+
.cntrl_scheduler_i ( cntrl_scheduler ),
738+
.reg_enable_o ( reg_enable ),
739+
.cntrl_engine_o ( cntrl_engine ),
740+
.cntrl_x_buffer_o ( x_buffer_ctrl ),
741+
.cntrl_w_buffer_o ( w_buffer_ctrl ),
742+
.cntrl_z_buffer_o ( z_buffer_ctrl ),
743+
.cntrl_gidx_buffer_o ( gidx_buffer_ctrl ),
744+
.flgs_scheduler_o ( flgs_scheduler )
744745
);
745746

746747
endmodule : redmule_top

rtl/x_buffer/redmule_x_buffer.sv

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ localparam int unsigned TOT_DEPTH = H*D
2828
output x_buffer_flgs_t flags_o ,
2929
output logic [W-1:0][H-1:0][BITW-1:0] x_buffer_o ,
3030
input logic [DW-1:0] x_buffer_i ,
31-
input logic [$clog2(D*H)-1:0] next_wrow_i , //Tentative name
31+
input logic [$clog2(D*H)-1:0] next_wrow_i ,
3232
input logic next_wrow_valid_i,
3333
output logic next_wrow_ready_o
3434
);
@@ -159,9 +159,9 @@ always_comb begin : fsm
159159

160160
FAST_FILL: begin
161161
// As buf_write_cnt increments one cycle late, we have to check if its value is set to increase in the next cycle
162-
if ((pad_r_addr_q == buf_write_cnt-1 || flags_o.empty) && (~ctrl_i.h_shift || first_block || (pad_read_cnt == ctrl_i.slots))) begin
162+
if ((pad_r_addr_q == buf_write_cnt-1 || flags_o.empty) && (~ctrl_i.h_shift || first_block || (pad_read_cnt == ctrl_i.slots)) && (~ctrl_i.dequant || next_wrow_valid_i || ctrl_i.last_x)) begin
163163
if (pad_read_cnt == ctrl_i.slots) begin
164-
if (~flags_o.full || ctrl_i.rst_w_index) begin
164+
if (~flags_o.full/* || ctrl_i.rst_w_index*/) begin
165165
next_state = PAD_EMPTY;
166166
end else begin
167167
next_state = WAIT_FIRST_READ;
@@ -180,11 +180,7 @@ always_comb begin : fsm
180180

181181
WAIT_FIRST_READ: begin
182182
if (h_index_r == H-1 && ctrl_i.h_shift) begin
183-
if (ctrl_i.rst_w_index) begin
184-
next_state = PAD_EMPTY;
185-
end else begin
186183
next_state = FILL;
187-
end
188184
end
189185
end
190186

0 commit comments

Comments
 (0)