Skip to content

Commit c4ee4cf

Browse files
Merge pull request #198 from JulianKemmerer/func_latency
Implement FUNC_LATENCY pragma for user defined pipeline building blocks
2 parents 91876f6 + eb01447 commit c4ee4cf

File tree

7 files changed

+275
-152
lines changed

7 files changed

+275
-152
lines changed

examples/user_func_latency.c

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
2+
3+
#pragma PART "xc7a35ticsg324-1l"
4+
// How to allow user defined fixed pipeline latencies?
5+
// https://github.com/JulianKemmerer/PipelineC/issues/97
6+
#include "uintN_t.h"
7+
#include "compiler.h"
8+
9+
/*
10+
#pragma FUNC_LATENCY small_op_delay 1
11+
uint64_t small_op_delay(uint64_t x){
12+
static uint64_t the_reg;
13+
uint64_t rv = the_reg;
14+
the_reg = ~x;
15+
return rv;
16+
}
17+
typedef struct values_t{
18+
uint64_t user_delayed;
19+
uint64_t tool_delayed;
20+
}values_t;
21+
#pragma MAIN_MHZ main 400.0
22+
values_t main(uint64_t x, uint64_t y){
23+
values_t outputs;
24+
outputs.tool_delayed = ~x + ~y;
25+
outputs.user_delayed = small_op_delay(x) + small_op_delay(y);
26+
return outputs;
27+
}
28+
*/
29+
30+
BUILT_IN_RAM_FUNC_LATENCY(my_func, my_bram_RAM_DP_RF_1, 1)
31+
#pragma MAIN my_func
32+
uint32_t my_func(uint32_t waddr, uint32_t wdata, uint32_t raddr)
33+
{
34+
static uint32_t my_bram[128];
35+
//static uint32_t waddr = 0;
36+
//static uint32_t wdata = 0;
37+
//static uint32_t raddr = 0;
38+
uint32_t rdata = my_bram_RAM_DP_RF_1(raddr, waddr, wdata, 1);
39+
printf("Write: addr=%d,data=%d. Read addr=%d. Read data=%d\n",
40+
waddr, wdata, raddr, rdata);
41+
// Test pattern
42+
//if(wdata > 0){
43+
// raddr += 1;
44+
//}
45+
//waddr += 1;
46+
//wdata += 1;
47+
return rdata; // Dummy
48+
}
49+
50+
/*
51+
// This func is not allowed since it doesnt make sense in terms of implemented hardware
52+
// The static local variables describe clock by clock logic
53+
// but the BUILT_IN_RAM_FUNC_LATENCY=1 of my_bram_RAM_DP_RF_1 describes a pipeline
54+
#pragma MAIN my_bad_func
55+
BUILT_IN_RAM_FUNC_LATENCY(my_bad_func, my_bram_RAM_DP_RF_1, 1)
56+
uint32_t my_bad_func(uint32_t waddr, uint32_t wdata, uint32_t raddr)
57+
{
58+
static uint32_t my_bram[128];
59+
static uint32_t accum_rd_data = 0;
60+
uint32_t rdata = my_bram_RAM_DP_RF_1(raddr, waddr, wdata, 1);
61+
accum_rd_data += rdata;
62+
return rdata + accum_rd_data;
63+
}
64+
*/
65+
/*
66+
#include "intN_t.h"
67+
#pragma FUNC_LATENCY one_cycle_negate 1
68+
int64_t one_cycle_negate(int64_t x){
69+
static int64_t the_reg;
70+
int64_t rv = the_reg;
71+
the_reg = -x;
72+
return rv;
73+
}
74+
#pragma MAIN_MHZ main 200.0
75+
// Output should always be == input after some cycles
76+
// But can be pipelined
77+
int64_t main(int64_t x){
78+
int64_t nx = one_cycle_negate(x);
79+
int64_t z = x + nx;
80+
z = one_cycle_negate(z);
81+
return z + x;
82+
}
83+
*/

pipelinec/include/compiler.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,9 @@ PRAGMA_MESSAGE(MAIN_SYN_MHZ main_func mhz)
8080
#define MAIN_MHZ_GROUP(main_func, mhz, group)\
8181
PRAGMA_MESSAGE(MAIN_MHZ main_func mhz group)
8282

83+
#define BUILT_IN_RAM_FUNC_LATENCY(call_location_func_name, ram_name, latency) \
84+
PRAGMA_MESSAGE(FUNC_LATENCY PPCAT(PPCAT(call_location_func_name,_),ram_name) latency)
85+
8386
// Work around for user top level IO:
8487
// https://github.com/JulianKemmerer/PipelineC/issues/123
8588
// https://github.com/JulianKemmerer/PipelineC/issues/130

src/C_TO_LOGIC.py

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -737,6 +737,13 @@ def MERGE_COMB_LOGIC(self, logic_b):
737737
self.wire_driven_by, logic_b.wire_driven_by
738738
)
739739

740+
# Delay
741+
if self.delay is not None and logic_b.delay is not None:
742+
if self.delay != logic_b.delay:
743+
raise Exception("Mismatch delay!")
744+
elif self.delay is None:
745+
self.delay = logic_b.delay
746+
740747
# NExt user inst name?
741748
if self.next_user_inst_name != logic_b.next_user_inst_name:
742749
if self.next_user_inst_name is None:
@@ -1415,6 +1422,8 @@ def CAN_BE_SLICED(self, parser_state):
14151422
return False
14161423
return True
14171424
def BODY_CAN_BE_SLICED(self, parser_state):
1425+
if self.func_name in parser_state.func_fixed_latency:
1426+
return False
14181427
if not self.CAN_BE_SLICED(parser_state):
14191428
return False
14201429
if self.uses_nonvolatile_state_regs:
@@ -9605,7 +9614,7 @@ def DEL_ALL_CACHES():
96059614
global _C_AST_REF_TOKS_TO_C_TYPE_cache
96069615
global _C_AST_NODE_COORD_STR_cache
96079616
global _C_AST_FUNC_DEF_TO_LOGIC_cache
9608-
# global _GET_ZERO_CLK_PIPELINE_MAP_cache
9617+
# global _GET_ZERO_ADDED_CLKS_PIPELINE_MAP_cache
96099618

96109619
_other_partial_logic_cache = {}
96119620
_REF_TOKS_TO_OWN_BRANCH_REF_TOKS_cache = {}
@@ -9617,7 +9626,7 @@ def DEL_ALL_CACHES():
96179626
_C_AST_REF_TOKS_TO_C_TYPE_cache = {}
96189627
_C_AST_NODE_COORD_STR_cache = {}
96199628
_C_AST_FUNC_DEF_TO_LOGIC_cache = {}
9620-
# _GET_ZERO_CLK_PIPELINE_MAP_cache = {}
9629+
# _GET_ZERO_ADDED_CLKS_PIPELINE_MAP_cache = {}
96219630

96229631

96239632
_EXE_ABS_DIR = None
@@ -9675,6 +9684,7 @@ def __init__(self):
96759684
self.main_clk_group = {} # dict[main_inst_name]=clk_group_str
96769685
self.func_mult_style = {}
96779686
self.func_marked_wires = set()
9687+
self.func_fixed_latency = {}
96789688
self.func_marked_blackbox = set()
96799689
self.func_marked_no_add_io_regs = set()
96809690
self.func_marked_debug = set()
@@ -9737,6 +9747,7 @@ def DEEPCOPY(self):
97379747
rv.main_clk_group = dict(self.main_clk_group)
97389748
rv.func_mult_style = dict(self.func_mult_style)
97399749
rv.func_marked_wires = set(self.func_marked_wires)
9750+
rv.func_fixed_latency = dict(self.func_fixed_latency)
97409751
rv.func_marked_blackbox = set(self.func_marked_blackbox)
97419752
rv.func_marked_no_add_io_regs = set(self.func_marked_no_add_io_regs)
97429753
rv.func_marked_debug = set(self.func_marked_debug)
@@ -10198,32 +10209,32 @@ def PARSE_FILE(c_filename):
1019810209
sys.exit(-1)
1019910210

1020010211

10201-
def WRITE_0CLK_FINAL_FILES(parser_state):
10202-
print("Building map of combinatorial logic...", flush=True)
10212+
def WRITE_0_ADDED_CLKS_FINAL_FILES(parser_state):
10213+
print("Building map of logic to be pipelined...", flush=True)
1020310214
SYN.PART_SET_TOOL(
1020410215
parser_state.part, allow_fail=True
1020510216
) # Comb logic only might not have tool set
10206-
ZeroClockTimingParamsLookupTable = SYN.GET_ZERO_CLK_TIMING_PARAMS_LOOKUP(
10217+
ZeroAddedClocksTimingParamsLookupTable = SYN.GET_ZERO_ADDED_CLKS_TIMING_PARAMS_LOOKUP(
1020710218
parser_state
1020810219
)
1020910220
multimain_timing_params = SYN.MultiMainTimingParams()
10210-
multimain_timing_params.TimingParamsLookupTable = ZeroClockTimingParamsLookupTable
10221+
multimain_timing_params.TimingParamsLookupTable = ZeroAddedClocksTimingParamsLookupTable
1021110222
# Write report of floating point module use - hi Victor!
1021210223
WRITE_FLOAT_MODULE_INSTANCES_REPORT(multimain_timing_params, parser_state)
1021310224
# Integers too..
1021410225
WRITE_INTEGER_MODULE_INSTANCES_REPORT(multimain_timing_params, parser_state)
1021510226
print(
10216-
"Writing VHDL files for all functions (as combinatorial logic)...", flush=True
10227+
"Writing VHDL files for all functions (before any added pipelining)...", flush=True
1021710228
)
10218-
SYN.WRITE_ALL_ZERO_CLK_VHDL(parser_state, ZeroClockTimingParamsLookupTable)
10229+
SYN.WRITE_ALL_ZERO_CLK_VHDL(parser_state, ZeroAddedClocksTimingParamsLookupTable)
1021910230
print(
1022010231
"Writing the constant struct+enum definitions as defined from C code...",
1022110232
flush=True,
1022210233
)
1022310234
VHDL.WRITE_C_DEFINED_VHDL_STRUCTS_PACKAGE(parser_state)
1022410235
print("Writing global wire definitions as parsed from C code...", flush=True)
1022510236
VHDL.WRITE_GLOBAL_WIRES_VHDL_PACKAGE(parser_state)
10226-
print("Writing finalized comb. logic synthesis tool files...", flush=True)
10237+
print("Writing output files before adding pipelining...", flush=True)
1022710238
SYN.WRITE_FINAL_FILES(multimain_timing_params, parser_state)
1022810239

1022910240

@@ -11568,6 +11579,13 @@ def APPEND_PRAGMA_INFO(parser_state):
1156811579
main_func = toks[1]
1156911580
parser_state.func_marked_wires.add(main_func)
1157011581

11582+
# FUNC_LATENCY
11583+
elif name == "FUNC_LATENCY":
11584+
toks = pragma.string.split(" ")
11585+
func = toks[1]
11586+
latency = int(toks[2])
11587+
parser_state.func_fixed_latency[func] = latency
11588+
1157111589
# FUNC_BLACKBOX
1157211590
elif name == "FUNC_BLACKBOX":
1157311591
toks = pragma.string.split(" ")

src/SW_LIB.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1855,12 +1855,31 @@ def GET_MEM_NAME(logic):
18551855
return RAM_SP_RF + "_2"
18561856
elif logic.func_name.endswith("_" + RAM_DP_RF + "_0"):
18571857
return RAM_DP_RF + "_0"
1858+
elif logic.func_name.endswith("_" + RAM_DP_RF + "_1"):
1859+
return RAM_DP_RF + "_1"
18581860
elif logic.func_name.endswith("_" + RAM_DP_RF + "_2"):
18591861
return RAM_DP_RF + "_2"
18601862
else:
18611863
print("GET_MEM_NAME for func", logic.func_name, "?")
18621864
sys.exit(-1)
18631865

1866+
def MEM_NAME_TO_LATENCY(mem_name):
1867+
if mem_name == RAM_SP_RF + "_0":
1868+
return 0
1869+
elif mem_name == RAM_SP_RF + "_1":
1870+
return 1
1871+
elif mem_name == RAM_SP_RF + "_2":
1872+
return 2
1873+
elif mem_name == RAM_DP_RF + "_0":
1874+
return 0
1875+
elif mem_name == RAM_DP_RF + "_1":
1876+
return 1
1877+
elif mem_name == RAM_DP_RF + "_2":
1878+
return 2
1879+
else:
1880+
print("MEM_NAME_TO_LATENCY for mem_name", mem_name, "?")
1881+
sys.exit(-1)
1882+
18641883

18651884
def GET_BIT_MATH_H_LOGIC_LOOKUP_FROM_FUNC_NAMES(func_names, parser_state):
18661885
# TODO dont do string search at all - do 'in' list checks?

0 commit comments

Comments
 (0)