Skip to content

Commit 377992e

Browse files
committed
[pace-fix] refactor ping pong input and output buffer using existing IPs.
Interface is verified with stallability from the memory side. the pace.py test stimuli generation is updated with exp function also. hwpe-stream IP points to a bug fix in fence module
1 parent a72eeca commit 377992e

File tree

8 files changed

+205
-85
lines changed

8 files changed

+205
-85
lines changed

.gitignore

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
**venv
2+
**.bender
3+
target/sim/vsim/work/
4+
sw/build/
5+
golden-model/**/txt/
6+
target/sim/vsim/vsim.wlf
7+
sw/inc/*
8+
.bender/** */
9+
target/sim/vsim/transcript**

Bender.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ dependencies:
1515
cv32e40p : { git: "https://github.com/pulp-platform/cv32e40p.git" , rev: "astral-v1.0" }
1616
cv32e40x : { git: "https://github.com/pulp-platform/cv32e40x.git" , rev: "redmule-v1.0" }
1717
ibex : { git: "https://github.com/pulp-platform/ibex.git" , rev: pulpissimo-v6.1.2 }
18-
hwpe-stream : { git: "https://github.com/pulp-platform/hwpe-stream.git" , rev: db62a6411a7f3dc2b2a74e202377da118a4a6673 } #branch: ab/strb_fix
18+
hwpe-stream : { git: "https://github.com/pulp-platform/hwpe-stream.git" , rev: 7eb50a7cb37dc2a970e0cfee10aff5d961e41340 } #branch: ab/strb_fix
1919
hwpe-ctrl : { git: "https://github.com/pulp-platform/hwpe-ctrl.git" , rev: 0e95510c0f4d43452d21b7723d766ae92e45c101 } # branch: yt/task-interfaces
2020
hci : { git: "https://github.com/pulp-platform/hci.git" , rev: fa625bdb824209bc2c0faaa6d99ec15ff981473f } # branch: ab/fifo_options
2121
fpnew : { git: "https://github.com/pulp-platform/cvfpu.git" , rev: "pulp-v0.1.3" }

golden-model/FP16/scripts/pace.py

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@ def float16_to_hex(f16_val):
1515
def silu(x):
1616
return x / (1 + np.exp(-x))
1717

18+
def exp(x):
19+
return np.exp(x)
20+
1821
# === BST Partitioning ===
1922

2023
def build_bst_indices(n_partitions):
@@ -61,7 +64,7 @@ def piecewise_poly_approx_bst_fp16(
6164

6265
coeffs = []
6366
for i in range(partitions):
64-
x = np.linspace(raw_bps[i], raw_bps[i + 1], 50).astype(np.float16)
67+
x = np.linspace(raw_bps[i], raw_bps[i + 1], 500).astype(np.float16)
6568
y = func(x.astype(np.float32)).astype(np.float16)
6669
p = np.polynomial.Polynomial.fit(x.astype(np.float32), y.astype(np.float32), deg=degree,
6770
domain=[float(raw_bps[i]), float(raw_bps[i + 1])])
@@ -73,6 +76,7 @@ def piecewise_poly_approx_bst_fp16(
7376
y_approx = np.zeros_like(x_vals)
7477

7578
debug_lines = []
79+
custom_debug_lines = []
7680

7781
# breakpoint layout
7882
debug_lines.append("=== Raw Breakpoints (sorted) ===")
@@ -115,23 +119,27 @@ def piecewise_poly_approx_bst_fp16(
115119
dbg.append(f" y_approx = {y_approx[idx]:.5f} ({float16_to_hex(y_approx[idx])})")
116120
dbg.append(f" error = {float(y_true[idx] - y_approx[idx]):.5f}")
117121
debug_lines.append("\n".join(dbg) + "\n")
122+
if idx % 16 == 0:
123+
custom_debug_lines.append("\n".join(dbg))
118124

119125
return {
120126
"x_vals": x_vals,
121127
"y_true": y_true,
122128
"y_approx": y_approx,
123129
"breakpoints_bst": breakpoints_bst,
124130
"coeffs": coeffs,
125-
"debug_lines": debug_lines
131+
"debug_lines": debug_lines,
132+
"custom_debug_lines": custom_debug_lines
126133
}
127134

128135

129136
def write_debug_output(results, debug_file="execution.txt"):
130137
with open(debug_file, "w") as f:
131-
for line in results["debug_lines"]:
138+
for line in results:
132139
f.write(line + "\n")
133140
print(f"✅ Debug written to: {debug_file}")
134141

142+
135143
def write_coefficients_output(results, coeff_file="coefficients.txt"):
136144
with open(coeff_file, "w") as f:
137145
for i, coeffs in enumerate(results["coeffs"]):
@@ -245,8 +253,8 @@ def write_tensor_dim_inc_file(stimuli_file = "tensor_dim.h", n_tests=1000):
245253
f_d.write('#ifndef __TENSOR_DIM__\n' )
246254
f_d.write('#define __TENSOR_DIM__\n\n' )
247255
f_d.write('#define M_SIZE 8 \n' )
248-
f_d.write('#define N_SIZE 32\n' )
249-
f_d.write(f'#define K_SIZE {n_tests/8} \n')
256+
f_d.write('#define N_SIZE 64\n' )
257+
f_d.write(f'#define K_SIZE {n_tests/32} \n')
250258
f_d.write('#define SRC_FMT FP16\n' )
251259
f_d.write('#define DST_FMT FP16\n' )
252260
f_d.write('#define FPFORMAT 16\n' )
@@ -258,27 +266,28 @@ def write_tensor_dim_inc_file(stimuli_file = "tensor_dim.h", n_tests=1000):
258266
if __name__ == "__main__":
259267
import argparse
260268
parser = argparse.ArgumentParser("PACE Operation Test")
261-
parser.add_argument( '--x_min', type=int, default=-6 )
262-
parser.add_argument( '--x_max', type=int, default=6 )
269+
parser.add_argument( '--x_min', type=int, default=-11 )
270+
parser.add_argument( '--x_max', type=int, default=0 )
263271
parser.add_argument( '--f_name', type=str, default="silu" )
264272
parser.add_argument( '--n_parts', type=int, default=8 )
265273
parser.add_argument( '--n_deg', type=int, default=4 )
266-
parser.add_argument( '--n_tests', type=int, default=1024 )
274+
parser.add_argument( '--n_tests', type=int, default=4096 )
267275
parser.add_argument( '--file_name', type=str, default='net_parameters.h')
268276
parser.add_argument( '--inc_dir', type=str)
269277
parser.add_argument( '--txt_dir', type=str)
270278
args = parser.parse_args()
271279
results = piecewise_poly_approx_bst_fp16(
272-
silu, xmin=-6, xmax=6, degree=4, partitions=8, n_stimuli=args.n_tests
280+
exp, xmin=args.x_min, xmax=args.x_max, degree=4, partitions=8, n_stimuli=args.n_tests
273281
)
274-
write_debug_output(debug_file=os.path.join(args.txt_dir,"execution.txt"),results=results)
282+
write_debug_output(debug_file=os.path.join(args.txt_dir,"execution.txt"),results=results["debug_lines"])
283+
write_debug_output(debug_file=os.path.join(args.txt_dir,"execution_custom.txt"),results=results["custom_debug_lines"])
275284
write_coefficients_output(coeff_file=os.path.join(args.txt_dir,"coefficients.txt"), results=results)
276285
write_inp_inc_file(results, stimuli_file=os.path.join(args.inc_dir, "w_input.h"))
277286
write_golden_oup_inc_file(results, stimuli_file=os.path.join(args.inc_dir, "golden.h"))
278287
write_actual_oup_inc_file(results, stimuli_file=os.path.join(args.inc_dir, "z_output.h"))
279288
write_golden_inc_debug_file(results, stimuli_file=os.path.join(args.txt_dir, "golden_debug.h"))
280289
write_y_inp_inc_file(stimuli_file=os.path.join(args.inc_dir, "y_input.h"))
281-
write_x_file(coeffs=results["coeffs"], xmin=-6, xmax=6, partitions=8, stimuli_file=os.path.join(args.inc_dir, "x_input.h"))
290+
write_x_file(coeffs=results["coeffs"], xmin=args.x_min, xmax=args.x_max, partitions=8, stimuli_file=os.path.join(args.inc_dir, "x_input.h"))
282291
write_tensor_dim_inc_file(stimuli_file=os.path.join(args.inc_dir, "tensor_dim.h"), n_tests=args.n_tests)
283292

284293

rtl/pace/pace_pingpong_inp.sv

Lines changed: 54 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -6,29 +6,40 @@
66
//
77
// This module takes a 256b data and splits it to 128b to feed it to the engine for 2 cycles.
88
module pace_pingpong_inp #(
9-
parameter int unsigned InpDataWidth = 256,
10-
parameter int unsigned NumRows = 8,
11-
parameter int unsigned OupDataWidth = 16
9+
parameter int unsigned InpDataWidth = 256,
10+
parameter int unsigned NumRows = 8,
11+
parameter int unsigned CEOupDataWidth = 16,
12+
localparam int unsigned OupDataWidth = NumRows * CEOupDataWidth
1213
) (
1314
input logic clk_i,
1415
input logic rst_ni,
1516
input logic clear_i,
1617
input logic enable_i,
17-
output logic [NumRows-1:0][OupDataWidth-1:0] output_o,
18+
output logic [NumRows-1:0][CEOupDataWidth-1:0] output_o,
1819
output logic valid_o,
1920
input logic ready_i,
2021
hwpe_stream_intf_stream.sink input_i
2122
);
2223

2324
// Local signals
2425
hwpe_stream_intf_stream #(
25-
.DATA_WIDTH ( InpDataWidth*NumRows )
26+
.DATA_WIDTH (InpDataWidth/2 )
2627
) ping_pong_buffer [1:0] (
2728
.clk ( clk_i )
2829
);
29-
logic output_handshake;
30-
logic [NumRows*OupDataWidth-1:0] output_buffer;
31-
logic ping_pong_status_d, ping_pong_status_q;
30+
31+
hwpe_stream_intf_stream #(
32+
.DATA_WIDTH ( InpDataWidth/2 )
33+
) output_buffer (
34+
.clk ( clk_i )
35+
);
36+
37+
hwpe_stream_intf_stream #(
38+
.DATA_WIDTH ( InpDataWidth/2 )
39+
) output_buffer_fifo (
40+
.clk ( clk_i )
41+
);
42+
3243

3344
// Stream splitter
3445
hwpe_stream_split #(
@@ -42,33 +53,49 @@ module pace_pingpong_inp #(
4253
.pop_o ( ping_pong_buffer )
4354
);
4455

45-
// Ready/valid handshake
46-
assign ping_pong_buffer[0].ready = output_handshake & ping_pong_status_q & enable_i;
47-
assign ping_pong_buffer[1].ready = output_handshake & ping_pong_status_q & enable_i;
56+
hwpe_stream_package::ctrl_serdes_t ctrl_serdes;
4857

49-
assign output_buffer = ping_pong_status_q ? ping_pong_buffer[1].data : ping_pong_buffer[0].data;
50-
assign valid_o = ping_pong_status_q ? ping_pong_buffer[1].valid : ping_pong_buffer[0].valid;
58+
assign ctrl_serdes.clear_serdes_state = clear_i;
59+
assign ctrl_serdes.nb_contig_m1 = 0;
60+
assign ctrl_serdes.first_stream = 1'b0;
61+
62+
hwpe_stream_serialize #(
63+
.NB_IN_STREAMS ( 2 ),
64+
.CONTIG_LIMIT ( 1024 ),
65+
.DATA_WIDTH ( OupDataWidth ),
66+
.SYNC_READY ( 1'b1 )
67+
) i_hwpe_stream_serialize (
68+
.clk_i ( clk_i ),
69+
.rst_ni ( rst_ni ),
70+
.clear_i ( clear_i ),
71+
.ctrl_i ( ctrl_serdes ),
72+
.push_i ( ping_pong_buffer ),
73+
.pop_o ( output_buffer )
74+
);
75+
76+
hwpe_stream_fifo #(
77+
.DATA_WIDTH ( OupDataWidth ),
78+
.FIFO_DEPTH ( 2 ),
79+
.LATCH_FIFO ( 0 ),
80+
.LATCH_FIFO_TEST_WRAP ( 0 )
81+
) i_hwpe_stream_fifo (
82+
.clk_i ( clk_i ),
83+
.rst_ni ( rst_ni ),
84+
.clear_i ( clear_i ),
85+
.flags_o ( ),
86+
.push_i ( output_buffer ),
87+
.pop_o ( output_buffer_fifo )
88+
);
5189

5290
// Output slicing
5391
generate
5492
for (genvar r = 0; r < NumRows; r++) begin : gen_output_unpack
55-
assign output_o[r] = output_buffer[(OupDataWidth*(r+1))-1 -: OupDataWidth];
93+
assign output_o[r] = output_buffer_fifo.data[(CEOupDataWidth*(r+1))-1 -: CEOupDataWidth];
5694
end
5795
endgenerate
96+
assign valid_o = output_buffer_fifo.valid;
97+
assign output_buffer_fifo.ready = ready_i & enable_i;
5898

59-
// Handshake logic
60-
assign output_handshake = valid_o & ready_i;
6199

62-
// Ping-pong control
63-
assign ping_pong_status_d = clear_i ? 1'b0 :
64-
output_handshake ? ~ping_pong_status_q :
65-
ping_pong_status_q;
66-
always_ff @(posedge clk_i or negedge rst_ni) begin : gen_ping_pong_status_ff
67-
if (~rst_ni) begin
68-
ping_pong_status_q <= 1'b0;
69-
end else begin
70-
ping_pong_status_q <= ping_pong_status_d;
71-
end
72-
end
73100

74101
endmodule

0 commit comments

Comments
 (0)