@@ -15,6 +15,9 @@ def float16_to_hex(f16_val):
1515def silu (x ):
1616 return x / (1 + np .exp (- x ))
1717
18+ def exp (x ):
19+ return np .exp (x )
20+
1821# === BST Partitioning ===
1922
2023def build_bst_indices (n_partitions ):
@@ -61,7 +64,7 @@ def piecewise_poly_approx_bst_fp16(
6164
6265 coeffs = []
6366 for i in range (partitions ):
64- x = np .linspace (raw_bps [i ], raw_bps [i + 1 ], 50 ).astype (np .float16 )
67+ x = np .linspace (raw_bps [i ], raw_bps [i + 1 ], 500 ).astype (np .float16 )
6568 y = func (x .astype (np .float32 )).astype (np .float16 )
6669 p = np .polynomial .Polynomial .fit (x .astype (np .float32 ), y .astype (np .float32 ), deg = degree ,
6770 domain = [float (raw_bps [i ]), float (raw_bps [i + 1 ])])
@@ -73,6 +76,7 @@ def piecewise_poly_approx_bst_fp16(
7376 y_approx = np .zeros_like (x_vals )
7477
7578 debug_lines = []
79+ custom_debug_lines = []
7680
7781 # breakpoint layout
7882 debug_lines .append ("=== Raw Breakpoints (sorted) ===" )
@@ -115,23 +119,27 @@ def piecewise_poly_approx_bst_fp16(
115119 dbg .append (f" y_approx = { y_approx [idx ]:.5f} ({ float16_to_hex (y_approx [idx ])} )" )
116120 dbg .append (f" error = { float (y_true [idx ] - y_approx [idx ]):.5f} " )
117121 debug_lines .append ("\n " .join (dbg ) + "\n " )
122+ if idx % 16 == 0 :
123+ custom_debug_lines .append ("\n " .join (dbg ))
118124
119125 return {
120126 "x_vals" : x_vals ,
121127 "y_true" : y_true ,
122128 "y_approx" : y_approx ,
123129 "breakpoints_bst" : breakpoints_bst ,
124130 "coeffs" : coeffs ,
125- "debug_lines" : debug_lines
131+ "debug_lines" : debug_lines ,
132+ "custom_debug_lines" : custom_debug_lines
126133 }
127134
128135
129136def write_debug_output (results , debug_file = "execution.txt" ):
130137 with open (debug_file , "w" ) as f :
131- for line in results [ "debug_lines" ] :
138+ for line in results :
132139 f .write (line + "\n " )
133140 print (f"✅ Debug written to: { debug_file } " )
134141
142+
135143def write_coefficients_output (results , coeff_file = "coefficients.txt" ):
136144 with open (coeff_file , "w" ) as f :
137145 for i , coeffs in enumerate (results ["coeffs" ]):
@@ -245,8 +253,8 @@ def write_tensor_dim_inc_file(stimuli_file = "tensor_dim.h", n_tests=1000):
245253 f_d .write ('#ifndef __TENSOR_DIM__\n ' )
246254 f_d .write ('#define __TENSOR_DIM__\n \n ' )
247255 f_d .write ('#define M_SIZE 8 \n ' )
248- f_d .write ('#define N_SIZE 32 \n ' )
249- f_d .write (f'#define K_SIZE { n_tests / 8 } \n ' )
256+ f_d .write ('#define N_SIZE 64 \n ' )
257+ f_d .write (f'#define K_SIZE { n_tests / 32 } \n ' )
250258 f_d .write ('#define SRC_FMT FP16\n ' )
251259 f_d .write ('#define DST_FMT FP16\n ' )
252260 f_d .write ('#define FPFORMAT 16\n ' )
@@ -258,27 +266,28 @@ def write_tensor_dim_inc_file(stimuli_file = "tensor_dim.h", n_tests=1000):
258266if __name__ == "__main__" :
259267 import argparse
260268 parser = argparse .ArgumentParser ("PACE Operation Test" )
261- parser .add_argument ( '--x_min' , type = int , default = - 6 )
262- parser .add_argument ( '--x_max' , type = int , default = 6 )
269+ parser .add_argument ( '--x_min' , type = int , default = - 11 )
270+ parser .add_argument ( '--x_max' , type = int , default = 0 )
263271 parser .add_argument ( '--f_name' , type = str , default = "silu" )
264272 parser .add_argument ( '--n_parts' , type = int , default = 8 )
265273 parser .add_argument ( '--n_deg' , type = int , default = 4 )
266- parser .add_argument ( '--n_tests' , type = int , default = 1024 )
274+ parser .add_argument ( '--n_tests' , type = int , default = 4096 )
267275 parser .add_argument ( '--file_name' , type = str , default = 'net_parameters.h' )
268276 parser .add_argument ( '--inc_dir' , type = str )
269277 parser .add_argument ( '--txt_dir' , type = str )
270278 args = parser .parse_args ()
271279 results = piecewise_poly_approx_bst_fp16 (
272- silu , xmin = - 6 , xmax = 6 , degree = 4 , partitions = 8 , n_stimuli = args .n_tests
280+ exp , xmin = args . x_min , xmax = args . x_max , degree = 4 , partitions = 8 , n_stimuli = args .n_tests
273281 )
274- write_debug_output (debug_file = os .path .join (args .txt_dir ,"execution.txt" ),results = results )
282+ write_debug_output (debug_file = os .path .join (args .txt_dir ,"execution.txt" ),results = results ["debug_lines" ])
283+ write_debug_output (debug_file = os .path .join (args .txt_dir ,"execution_custom.txt" ),results = results ["custom_debug_lines" ])
275284 write_coefficients_output (coeff_file = os .path .join (args .txt_dir ,"coefficients.txt" ), results = results )
276285 write_inp_inc_file (results , stimuli_file = os .path .join (args .inc_dir , "w_input.h" ))
277286 write_golden_oup_inc_file (results , stimuli_file = os .path .join (args .inc_dir , "golden.h" ))
278287 write_actual_oup_inc_file (results , stimuli_file = os .path .join (args .inc_dir , "z_output.h" ))
279288 write_golden_inc_debug_file (results , stimuli_file = os .path .join (args .txt_dir , "golden_debug.h" ))
280289 write_y_inp_inc_file (stimuli_file = os .path .join (args .inc_dir , "y_input.h" ))
281- write_x_file (coeffs = results ["coeffs" ], xmin = - 6 , xmax = 6 , partitions = 8 , stimuli_file = os .path .join (args .inc_dir , "x_input.h" ))
290+ write_x_file (coeffs = results ["coeffs" ], xmin = args . x_min , xmax = args . x_max , partitions = 8 , stimuli_file = os .path .join (args .inc_dir , "x_input.h" ))
282291 write_tensor_dim_inc_file (stimuli_file = os .path .join (args .inc_dir , "tensor_dim.h" ), n_tests = args .n_tests )
283292
284293
0 commit comments