Skip to content

Commit f72d9d8

Browse files
committed
Merge remote-tracking branch 'xilinx/dev' into fix/finn_docker_tag
2 parents ae9e25c + 88e207e commit f72d9d8

40 files changed

+1282
-371
lines changed

docs/finn/faq.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ Which data layout do FINN-generated accelerators use? Big-endian? Little-endian?
8181
If you need to do this manually, first examine how the `FINN PYNQ Python drivers <https://github.com/Xilinx/finn-examples/blob/main/finn_examples/driver.py#L379>`_ do this – notice how the input data is
8282
first reshaped to create the “folded input shape” that reflects the word size of the first layer based on how much it
8383
was parallelized, then data packing is applied to obtain a raw byte array (with some reversals going on) that can be
84-
fed directly to the hardware. Another example of this is the `npy_to_rtlsim_input <https://github.com/Xilinx/finn-base/blob/dev/src/finn/util/data_packing.py#L289>`_ function, which converts npy arrays to lists of Python arbitrary-precision integers that we feed into pyverilator for rtl simulation.
84+
fed directly to the hardware. Another example of this is the `npy_to_rtlsim_input <https://github.com/Xilinx/finn/blob/dev/src/finn/util/data_packing.py#L284>`_ function, which converts npy arrays to lists of Python arbitrary-precision integers that we feed into pyverilator for rtl simulation.
8585

8686
Why does FIFO sizing take so long for my network? Is something wrong?
8787
The automatic FIFO sizing in FINN can take quite long. It unfortunately doesn’t really parallelize on multiple cores since

fetch-repos.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,9 @@
2727
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
2828
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2929

30-
QONNX_COMMIT="fd61cfeebbdaba351abf7e9d54cd785d7776fa4f"
30+
QONNX_COMMIT="2281a777d84aa5cbd7469085c2e534fb4a03ccf9"
3131
FINN_EXP_COMMIT="0724be21111a21f0d81a072fccc1c446e053f851"
32-
BREVITAS_COMMIT="84f42259ec869eb151af4cb8a8b23ad925f493db"
32+
BREVITAS_COMMIT="d4834bd2a0fad3c1fbc0ff7e1346d5dcb3797ea4"
3333
PYVERILATOR_COMMIT="ce0a08c20cb8c1d1e84181d6f392390f846adbd1"
3434
CNPY_COMMIT="4e8810b1a8637695171ed346ce68f6984e585ef4"
3535
HLSLIB_COMMIT="16e5847a5e3ef76cffe84c8fad2f010d593457d3"

finn-rtllib/fifo/hdl/Q_srl.v

Lines changed: 72 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -184,58 +184,58 @@ module Q_srl (clock, reset, i_d, i_v, i_r, o_d, o_v, o_r, count, maxcount);
184184
end // always @ (posedge clock or negedge reset)
185185

186186
always @* begin // - combi always
187-
srlo_ <= 'bx;
188-
shift_en_o_ <= 1'bx;
189-
shift_en_ <= 1'bx;
190-
addr_ <= 'bx;
191-
state_ <= 2'bx;
187+
srlo_ = 'bx;
188+
shift_en_o_ = 1'bx;
189+
shift_en_ = 1'bx;
190+
addr_ = 'bx;
191+
state_ = 2'bx;
192192
case (state)
193193

194194
state_empty: begin // - (empty, will not produce)
195195
if (i_v) begin // - empty & i_v => consume
196-
srlo_ <= i_d;
197-
shift_en_o_ <= 1;
198-
shift_en_ <= 1'bx;
199-
addr_ <= 0;
200-
state_ <= state_one;
196+
srlo_ = i_d;
197+
shift_en_o_ = 1;
198+
shift_en_ = 1'bx;
199+
addr_ = 0;
200+
state_ = state_one;
201201
end
202202
else begin // - empty & !i_v => idle
203-
srlo_ <= 'bx;
204-
shift_en_o_ <= 0;
205-
shift_en_ <= 1'bx;
206-
addr_ <= 0;
207-
state_ <= state_empty;
203+
srlo_ = 'bx;
204+
shift_en_o_ = 0;
205+
shift_en_ = 1'bx;
206+
addr_ = 0;
207+
state_ = state_empty;
208208
end
209209
end
210210

211211
state_one: begin // - (contains one)
212212
if (i_v && o_b) begin // - one & i_v & o_b => consume
213-
srlo_ <= 'bx;
214-
shift_en_o_ <= 0;
215-
shift_en_ <= 1;
216-
addr_ <= 0;
217-
state_ <= state_more;
213+
srlo_ = 'bx;
214+
shift_en_o_ = 0;
215+
shift_en_ = 1;
216+
addr_ = 0;
217+
state_ = state_more;
218218
end
219219
else if (i_v && !o_b) begin // - one & i_v & !o_b => cons+prod
220-
srlo_ <= i_d;
221-
shift_en_o_ <= 1;
222-
shift_en_ <= 1;
223-
addr_ <= 0;
224-
state_ <= state_one;
220+
srlo_ = i_d;
221+
shift_en_o_ = 1;
222+
shift_en_ = 1;
223+
addr_ = 0;
224+
state_ = state_one;
225225
end
226226
else if (!i_v && o_b) begin // - one & !i_v & o_b => idle
227-
srlo_ <= 'bx;
228-
shift_en_o_ <= 0;
229-
shift_en_ <= 1'bx;
230-
addr_ <= 0;
231-
state_ <= state_one;
227+
srlo_ = 'bx;
228+
shift_en_o_ = 0;
229+
shift_en_ = 1'bx;
230+
addr_ = 0;
231+
state_ = state_one;
232232
end
233233
else if (!i_v && !o_b) begin // - one & !i_v & !o_b => produce
234-
srlo_ <= 'bx;
235-
shift_en_o_ <= 0;
236-
shift_en_ <= 1'bx;
237-
addr_ <= 0;
238-
state_ <= state_empty;
234+
srlo_ = 'bx;
235+
shift_en_o_ = 0;
236+
shift_en_ = 1'bx;
237+
addr_ = 0;
238+
state_ = state_empty;
239239
end
240240
end // case: state_one
241241

@@ -244,60 +244,60 @@ module Q_srl (clock, reset, i_d, i_v, i_r, o_d, o_v, o_r, count, maxcount);
244244
// - (full, will not consume)
245245
// - (full here if depth==2)
246246
if (o_b) begin // - full & o_b => idle
247-
srlo_ <= 'bx;
248-
shift_en_o_ <= 0;
249-
shift_en_ <= 0;
250-
addr_ <= addr;
251-
state_ <= state_more;
247+
srlo_ = 'bx;
248+
shift_en_o_ = 0;
249+
shift_en_ = 0;
250+
addr_ = addr;
251+
state_ = state_more;
252252
end
253253
else begin // - full & !o_b => produce
254-
srlo_ <= srl[addr];
255-
shift_en_o_ <= 1;
256-
shift_en_ <= 0;
257-
// addr_ <= addr-1;
258-
// state_ <= state_more;
259-
addr_ <= addr_zero_ ? 0 : addr-1;
260-
state_ <= addr_zero_ ? state_one : state_more;
254+
srlo_ = srl[addr];
255+
shift_en_o_ = 1;
256+
shift_en_ = 0;
257+
// addr_ = addr-1;
258+
// state_ = state_more;
259+
addr_ = addr_zero_ ? 0 : addr-1;
260+
state_ = addr_zero_ ? state_one : state_more;
261261
end
262262
end
263263
else begin // - (mid: neither empty nor full)
264264
if (i_v && o_b) begin // - mid & i_v & o_b => consume
265-
srlo_ <= 'bx;
266-
shift_en_o_ <= 0;
267-
shift_en_ <= 1;
268-
addr_ <= addr+1;
269-
state_ <= state_more;
265+
srlo_ = 'bx;
266+
shift_en_o_ = 0;
267+
shift_en_ = 1;
268+
addr_ = addr+1;
269+
state_ = state_more;
270270
end
271271
else if (i_v && !o_b) begin // - mid & i_v & !o_b => cons+prod
272-
srlo_ <= srl[addr];
273-
shift_en_o_ <= 1;
274-
shift_en_ <= 1;
275-
addr_ <= addr;
276-
state_ <= state_more;
272+
srlo_ = srl[addr];
273+
shift_en_o_ = 1;
274+
shift_en_ = 1;
275+
addr_ = addr;
276+
state_ = state_more;
277277
end
278278
else if (!i_v && o_b) begin // - mid & !i_v & o_b => idle
279-
srlo_ <= 'bx;
280-
shift_en_o_ <= 0;
281-
shift_en_ <= 0;
282-
addr_ <= addr;
283-
state_ <= state_more;
279+
srlo_ = 'bx;
280+
shift_en_o_ = 0;
281+
shift_en_ = 0;
282+
addr_ = addr;
283+
state_ = state_more;
284284
end
285285
else if (!i_v && !o_b) begin // - mid & !i_v & !o_b => produce
286-
srlo_ <= srl[addr];
287-
shift_en_o_ <= 1;
288-
shift_en_ <= 0;
289-
addr_ <= addr_zero_ ? 0 : addr-1;
290-
state_ <= addr_zero_ ? state_one : state_more;
286+
srlo_ = srl[addr];
287+
shift_en_o_ = 1;
288+
shift_en_ = 0;
289+
addr_ = addr_zero_ ? 0 : addr-1;
290+
state_ = addr_zero_ ? state_one : state_more;
291291
end
292292
end // else: !if(addr_full)
293293
end // case: state_more
294294

295295
default: begin
296-
srlo_ <= 'bx;
297-
shift_en_o_ <= 1'bx;
298-
shift_en_ <= 1'bx;
299-
addr_ <= 'bx;
300-
state_ <= 2'bx;
296+
srlo_ = 'bx;
297+
shift_en_o_ = 1'bx;
298+
shift_en_ = 1'bx;
299+
addr_ = 'bx;
300+
state_ = 2'bx;
301301
end // case: default
302302

303303
endcase // case(state)

finn-rtllib/mvu/mvu_8sx8u_dsp48.sv

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,10 @@ module mvu_8sx8u_dsp48 #(
7272
return res;
7373
endfunction : init_leave_loads
7474

75+
function int unsigned sum_width(input int unsigned n, input int unsigned w);
76+
return w <= 16? $clog2(1 + n*(2**w - 1)) : w + $clog2(n);
77+
endfunction : sum_width
78+
7579
// Pipeline for last indicator flag
7680
logic [1:5] L = '0;
7781
always_ff @(posedge clk) begin
@@ -445,7 +449,7 @@ module mvu_8sx8u_dsp48 #(
445449
// Stage #4: Cross-SIMD Reduction
446450

447451
// Count leaves reachable from each node
448-
localparam leave_load_t LEAVE_LOAD = SIMD > 1 ? init_leave_loads() : '{ default: 0}; // SIMD=1 requires no adder tree, so zero-ing out, otherwise init_leave_loads ends up in infinite loop
452+
localparam leave_load_t LEAVE_LOAD = SIMD > 1 ? init_leave_loads() : '{ default: 0 }; // SIMD=1 requires no adder tree, so zero-ing out, otherwise init_leave_loads ends up in infinite loop
449453

450454
// Range of Cross-lane Contribution Tracked in Hi4
451455
/*
@@ -462,7 +466,7 @@ module mvu_8sx8u_dsp48 #(
462466
* signed value is determined by its lower bound to be at least:
463467
* 1 + $clog2(2^(w-1)+SIMD)
464468
*/
465-
localparam int unsigned HI_WIDTH = 1 + $clog2(2**(ACCU_WIDTH-D[1]-1)+SIMD);
469+
localparam int unsigned HI_WIDTH = 1 + ($clog2(SIMD) < ACCU_WIDTH-D[1]? ACCU_WIDTH-D[1] : $clog2(2**(ACCU_WIDTH-D[1]-1)+SIMD));
466470

467471
uwire signed [ACCU_WIDTH -1:0] up4;
468472
uwire signed [HI_WIDTH -1:0] hi4;
@@ -504,12 +508,12 @@ module mvu_8sx8u_dsp48 #(
504508
// Conclusive low part accumulation
505509
if(i >= PE_REM) begin : blkLo
506510
// Adder Tree across all SIMD low contributions (all unsigned arithmetic)
507-
localparam int unsigned ROOT_WIDTH = $clog2(1 + SIMD*(2**LO_WIDTH-1));
511+
localparam int unsigned ROOT_WIDTH = sum_width(SIMD, LO_WIDTH);
508512
uwire [2*SIMD-2:0][ROOT_WIDTH-1:0] tree;
509513
for(genvar s = 0; s < SIMD; s++) assign tree[SIMD-1+s] = p3[s][D[i]+:LO_WIDTH];
510514
for(genvar n = 0; n < SIMD-1; n++) begin
511515
// Sum truncated to actual maximum bit width at this node
512-
localparam int unsigned NODE_WIDTH = $clog2(1 + LEAVE_LOAD[n]*(2**LO_WIDTH-1));
516+
localparam int unsigned NODE_WIDTH = sum_width(LEAVE_LOAD[n], LO_WIDTH);
513517
uwire [NODE_WIDTH-1:0] s = tree[2*n+1] + tree[2*n+2];
514518
assign tree[n] = s;
515519
end

0 commit comments

Comments
 (0)