66#
77
88import numpy as np
9- import os
9+ import os
1010
1111def float16_to_hex (f16_val ):
1212 arr = np .array (f16_val , dtype = np .float16 ).reshape (())
@@ -15,6 +15,9 @@ def float16_to_hex(f16_val):
1515def silu (x ):
1616 return x / (1 + np .exp (- x ))
1717
18+ def exp (x ):
19+ return np .exp (x )
20+
1821# === BST Partitioning ===
1922
2023def build_bst_indices (n_partitions ):
@@ -61,7 +64,7 @@ def piecewise_poly_approx_bst_fp16(
6164
6265 coeffs = []
6366 for i in range (partitions ):
64- x = np .linspace (raw_bps [i ], raw_bps [i + 1 ], 50 ).astype (np .float16 )
67+ x = np .linspace (raw_bps [i ], raw_bps [i + 1 ], 500 ).astype (np .float16 )
6568 y = func (x .astype (np .float32 )).astype (np .float16 )
6669 p = np .polynomial .Polynomial .fit (x .astype (np .float32 ), y .astype (np .float32 ), deg = degree ,
6770 domain = [float (raw_bps [i ]), float (raw_bps [i + 1 ])])
@@ -73,6 +76,7 @@ def piecewise_poly_approx_bst_fp16(
7376 y_approx = np .zeros_like (x_vals )
7477
7578 debug_lines = []
79+ custom_debug_lines = []
7680
7781 # breakpoint layout
7882 debug_lines .append ("=== Raw Breakpoints (sorted) ===" )
@@ -115,23 +119,27 @@ def piecewise_poly_approx_bst_fp16(
115119 dbg .append (f" y_approx = { y_approx [idx ]:.5f} ({ float16_to_hex (y_approx [idx ])} )" )
116120 dbg .append (f" error = { float (y_true [idx ] - y_approx [idx ]):.5f} " )
117121 debug_lines .append ("\n " .join (dbg ) + "\n " )
122+ if idx % 16 == 0 :
123+ custom_debug_lines .append ("\n " .join (dbg ))
118124
119125 return {
120126 "x_vals" : x_vals ,
121127 "y_true" : y_true ,
122128 "y_approx" : y_approx ,
123129 "breakpoints_bst" : breakpoints_bst ,
124130 "coeffs" : coeffs ,
125- "debug_lines" : debug_lines
131+ "debug_lines" : debug_lines ,
132+ "custom_debug_lines" : custom_debug_lines
126133 }
127134
128135
129136def write_debug_output (results , debug_file = "execution.txt" ):
130137 with open (debug_file , "w" ) as f :
131- for line in results [ "debug_lines" ] :
138+ for line in results :
132139 f .write (line + "\n " )
133140 print (f"✅ Debug written to: { debug_file } " )
134141
142+
135143def write_coefficients_output (results , coeff_file = "coefficients.txt" ):
136144 with open (coeff_file , "w" ) as f :
137145 for i , coeffs in enumerate (results ["coeffs" ]):
@@ -176,18 +184,18 @@ def write_x_file(coeffs, xmin=-6, xmax=6, partitions=8, stimuli_file="x_input.h"
176184 f_x .write ('};\n ' )
177185 print (f"✅ x_input header written to: { stimuli_file } " )
178186
179-
187+
180188
181189def write_inp_inc_file (results , stimuli_file = "w_input.h" ):
182- size = len (results ["x_vals" ])
190+ size = len (results ["x_vals" ])
183191 with open (stimuli_file , "w" ) as f :
184192 f .write (f' uint16_t w_inp [{ size } ] =' + '{' )
185193 for i , x in enumerate (results ["x_vals" ]):
186194 if i % 8 == 0 :
187195 f .write ('\n ' )
188- if i == size - 1 :
196+ if i == size - 1 :
189197 f .write (f" { float16_to_hex (x )} \n " )
190- else :
198+ else :
191199 f .write (f" { float16_to_hex (x )} ," )
192200 f .write ('};\n ' )
193201 print (f"✅ Stimuli header written to: { stimuli_file } " )
@@ -199,8 +207,8 @@ def write_golden_oup_inc_file(results, stimuli_file="golden.h"):
199207 with open (stimuli_file , "w" ) as f :
200208 f .write (f'uint32_t golden[{ size } ] = {{\n ' )
201209 for i in range (0 , len (y_approx ), 2 ):
202- low_16 = float16_to_hex (y_approx [i ]).removeprefix ("0x" )
203- high_16 = float16_to_hex (y_approx [i + 1 ]).removeprefix ("0x" )
210+ low_16 = float16_to_hex (y_approx [i ]).replace ("0x" , " " )
211+ high_16 = float16_to_hex (y_approx [i + 1 ]).replace ("0x" , " " )
204212 combined = f"0x{ high_16 } { low_16 } "
205213 end_char = ',\n ' if i < len (y_approx ) - 2 else '\n '
206214 f .write (f"{ combined } { end_char } " )
@@ -209,7 +217,7 @@ def write_golden_oup_inc_file(results, stimuli_file="golden.h"):
209217
210218def write_golden_inc_debug_file (results , stimuli_file = "golden_debug.h" ):
211219 y_approx = results ["y_approx" ]
212- size = len (y_approx )
220+ size = len (y_approx )
213221
214222 with open (stimuli_file , "w" ) as f :
215223 f .write (f'uint32_t golden[{ size } ] = {{' )
@@ -245,40 +253,42 @@ def write_tensor_dim_inc_file(stimuli_file = "tensor_dim.h", n_tests=1000):
245253 f_d .write ('#ifndef __TENSOR_DIM__\n ' )
246254 f_d .write ('#define __TENSOR_DIM__\n \n ' )
247255 f_d .write ('#define M_SIZE 8 \n ' )
248- f_d .write ('#define N_SIZE 32 \n ' )
249- f_d .write (f'#define K_SIZE { n_tests / 8 } \n ' )
256+ f_d .write ('#define N_SIZE 64 \n ' )
257+ f_d .write (f'#define K_SIZE { n_tests / 32 } \n ' )
250258 f_d .write ('#define SRC_FMT FP16\n ' )
251259 f_d .write ('#define DST_FMT FP16\n ' )
252260 f_d .write ('#define FPFORMAT 16\n ' )
253261 f_d .write ('uint8_t gemm_ops = PACE; \n ' )
262+ f_d .write ('uint8_t quant_fmt = 0; \n ' )
254263 f_d .write ('\n #endif\n ' )
255264 f_d .close ()
256265
257266
258267if __name__ == "__main__" :
259268 import argparse
260269 parser = argparse .ArgumentParser ("PACE Operation Test" )
261- parser .add_argument ( '--x_min' , type = int , default = - 6 )
262- parser .add_argument ( '--x_max' , type = int , default = 6 )
270+ parser .add_argument ( '--x_min' , type = int , default = - 11 )
271+ parser .add_argument ( '--x_max' , type = int , default = 0 )
263272 parser .add_argument ( '--f_name' , type = str , default = "silu" )
264273 parser .add_argument ( '--n_parts' , type = int , default = 8 )
265274 parser .add_argument ( '--n_deg' , type = int , default = 4 )
266- parser .add_argument ( '--n_tests' , type = int , default = 1024 )
275+ parser .add_argument ( '--n_tests' , type = int , default = 4096 )
267276 parser .add_argument ( '--file_name' , type = str , default = 'net_parameters.h' )
268277 parser .add_argument ( '--inc_dir' , type = str )
269278 parser .add_argument ( '--txt_dir' , type = str )
270279 args = parser .parse_args ()
271280 results = piecewise_poly_approx_bst_fp16 (
272- silu , xmin = - 6 , xmax = 6 , degree = 4 , partitions = 8 , n_stimuli = args .n_tests
281+ exp , xmin = args . x_min , xmax = args . x_max , degree = 4 , partitions = 8 , n_stimuli = args .n_tests
273282 )
274- write_debug_output (debug_file = os .path .join (args .txt_dir ,"execution.txt" ),results = results )
283+ write_debug_output (debug_file = os .path .join (args .txt_dir ,"execution.txt" ),results = results ["debug_lines" ])
284+ write_debug_output (debug_file = os .path .join (args .txt_dir ,"execution_custom.txt" ),results = results ["custom_debug_lines" ])
275285 write_coefficients_output (coeff_file = os .path .join (args .txt_dir ,"coefficients.txt" ), results = results )
276286 write_inp_inc_file (results , stimuli_file = os .path .join (args .inc_dir , "w_input.h" ))
277287 write_golden_oup_inc_file (results , stimuli_file = os .path .join (args .inc_dir , "golden.h" ))
278288 write_actual_oup_inc_file (results , stimuli_file = os .path .join (args .inc_dir , "z_output.h" ))
279289 write_golden_inc_debug_file (results , stimuli_file = os .path .join (args .txt_dir , "golden_debug.h" ))
280290 write_y_inp_inc_file (stimuli_file = os .path .join (args .inc_dir , "y_input.h" ))
281- write_x_file (coeffs = results ["coeffs" ], xmin = - 6 , xmax = 6 , partitions = 8 , stimuli_file = os .path .join (args .inc_dir , "x_input.h" ))
291+ write_x_file (coeffs = results ["coeffs" ], xmin = args . x_min , xmax = args . x_max , partitions = 8 , stimuli_file = os .path .join (args .inc_dir , "x_input.h" ))
282292 write_tensor_dim_inc_file (stimuli_file = os .path .join (args .inc_dir , "tensor_dim.h" ), n_tests = args .n_tests )
283293
284294
0 commit comments