3232import aie .compiler .aiecc .configure
3333from aie .dialects import aie as aiedialect
3434from aie .dialects import aiex as aiexdialect
35- from aie .ir import Context , Location , Module , InsertionPoint , IndexType , StringAttr
35+ from aie .ir import (
36+ Context ,
37+ Location ,
38+ Module ,
39+ InsertionPoint ,
40+ IndexType ,
41+ StringAttr ,
42+ IntegerAttr ,
43+ IntegerType ,
44+ )
3645from aie .passmanager import PassManager
3746
3847
@@ -319,6 +328,58 @@ def generate_runtime_sequences_list(device_op):
319328 ]
320329
321330
331+ def find_aiebu_asm ():
332+ asm_bin = "aiebu-asm"
333+ if shutil .which (asm_bin ) is None :
334+ asm_bin = os .path .join ("/" , "opt" , "xilinx" , "aiebu" , "bin" , "aiebu-asm" )
335+ if shutil .which (asm_bin ) is None :
336+ asm_bin = None
337+ if asm_bin is None :
338+ print (
339+ "Error: aiebu-asm not found." ,
340+ file = sys .stderr ,
341+ )
342+ sys .exit (1 )
343+ return asm_bin
344+
345+
346+ def create_device_id_mapping (devices ):
347+ """Assign an ID to each device in the MLIR; used later to assign IDs for each PDI"""
348+ device_to_id = {}
349+ for i , (device_op , device_name ) in enumerate (devices , 1 ):
350+ device_to_id [device_name ] = i
351+ return device_to_id
352+
353+
354+ def assign_load_pdi_ids (mlir_module_str , device_to_id_mapping ):
355+ """Transform symbolic aiex.npu.load_pdi references to numeric IDs"""
356+ with Context () as context , Location .unknown ():
357+ module = Module .parse (mlir_module_str )
358+
359+ for runtime_seq in find_ops (
360+ module .operation ,
361+ lambda o : isinstance (o .operation .opview , aiexdialect .RuntimeSequenceOp ),
362+ ):
363+ for load_pdi_op in find_ops (
364+ runtime_seq .operation ,
365+ lambda o : isinstance (o .operation .opview , aiexdialect .NpuLoadPdiOp )
366+ and hasattr (o , "device_ref" )
367+ and o .device_ref is not None ,
368+ ):
369+ device_name = load_pdi_op .device_ref .value
370+ if device_name not in device_to_id_mapping :
371+ print (
372+ f"Warning: Device '{ device_name } ' for load_pdi instruction does not have a matching device PDI."
373+ )
374+ sys .exit (1 )
375+ pdi_id = device_to_id_mapping [device_name ]
376+ load_pdi_op .id = IntegerAttr .get (
377+ IntegerType .get_signless (32 , context = context ), pdi_id
378+ )
379+
380+ return str (module )
381+
382+
322383def set_elf_file_for_core (core , path ):
323384 with InsertionPoint .at_block_terminator (
324385 core .parent .regions [0 ].blocks [0 ]
@@ -493,6 +554,20 @@ def __init__(self, mlir_module_str, opts, tmpdirname):
493554 def prepend_tmp (self , x ):
494555 return os .path .join (self .tmpdirname , x )
495556
557+ def pdi_file_name (self , device_name ):
558+ return (
559+ opts .pdi_name .format (device_name )
560+ if opts .pdi
561+ else self .prepend_tmp (f"{ device_name } .pdi" )
562+ )
563+
564+ def npu_insts_file_name (self , device_name , seq_name ):
565+ return (
566+ opts .insts_name .format (device_name , seq_name )
567+ if opts .npu
568+ else self .prepend_tmp (f"{ device_name } _{ seq_name } .bin" )
569+ )
570+
496571 async def do_call (self , task_id , command , force = False ):
497572 if self .stopall :
498573 return
@@ -800,20 +875,7 @@ async def process_txn(self, module_str, device_name):
800875 return file_txn
801876
802877 async def aiebu_asm (self , input_file , output_file , ctrl_packet_file = None ):
803-
804- # find aiebu-asm binary
805- asm_bin = "aiebu-asm"
806- if shutil .which (asm_bin ) is None :
807- asm_bin = os .path .join ("/" , "opt" , "xilinx" , "aiebu" , "bin" , "aiebu-asm" )
808- if shutil .which (asm_bin ) is None :
809- asm_bin = None
810-
811- if asm_bin is None :
812- print (
813- "Error: aiebu-asm not found, generation of ELF file failed." ,
814- file = sys .stderr ,
815- )
816- sys .exit (1 )
878+ asm_bin = find_aiebu_asm ()
817879
818880 args = [
819881 asm_bin ,
@@ -849,6 +911,81 @@ async def aiebu_asm(self, input_file, output_file, ctrl_packet_file=None):
849911
850912 await self .do_call (None , args )
851913
914+ async def generate_full_elf_config_json (
915+ self , devices , device_to_id_mapping , opts , parent_task = None
916+ ):
917+ config = {"xrt-kernels" : []}
918+
919+ for device_op , device_name in devices :
920+ sequences = generate_runtime_sequences_list (device_op )
921+
922+ max_arg_count = max (
923+ len (seq_op .body .blocks [0 ].arguments ) for seq_op , seq_name in sequences
924+ )
925+ arguments = [
926+ {"name" : f"arg_{ i } " , "type" : "char *" , "offset" : hex (i * 8 )}
927+ for i in range (max_arg_count )
928+ ]
929+
930+ kernel_entry = {
931+ "name" : device_name ,
932+ "arguments" : arguments ,
933+ "instance" : [],
934+ "PDIs" : [],
935+ }
936+
937+ pdi_id = device_to_id_mapping [device_name ]
938+ pdi_filename = self .pdi_file_name (device_name )
939+ kernel_entry ["PDIs" ].append ({"id" : pdi_id , "PDI_file" : pdi_filename })
940+
941+ for seq_op , seq_name in sequences :
942+ insts_filename = self .npu_insts_file_name (device_name , seq_name )
943+ kernel_entry ["instance" ].append (
944+ {"id" : seq_name , "TXN_ctrl_code_file" : insts_filename }
945+ )
946+
947+ config ["xrt-kernels" ].append (kernel_entry )
948+
949+ return config
950+
951+ async def assemble_full_elf (
952+ self , config_json_path , output_elf_path , parent_task = None
953+ ):
954+ asm_bin = find_aiebu_asm ()
955+ args = [
956+ asm_bin ,
957+ "-t" ,
958+ "aie2_config" ,
959+ "-j" ,
960+ config_json_path ,
961+ "-o" ,
962+ output_elf_path ,
963+ ]
964+ await self .do_call (parent_task , args )
965+ if self .opts .verbose :
966+ print (f"Generated full ELF: { output_elf_path } " )
967+
968+ async def generate_full_elf (self , devices , device_to_id_mapping , parent_task = None ):
969+ """Generate config.json and invoke aiebu-asm after all artifacts are ready"""
970+ if parent_task :
971+ self .progress_bar .update (
972+ parent_task , advance = 0 , command = "Generating config.json"
973+ )
974+ config = await self .generate_full_elf_config_json (
975+ devices , device_to_id_mapping , self .opts , parent_task
976+ )
977+ config_json_path = self .prepend_tmp ("config.json" )
978+ await write_file_async (json .dumps (config , indent = 2 ), config_json_path )
979+ if self .opts .verbose :
980+ if self .opts .verbose :
981+ print (f"Generated config.json: { config_json_path } " )
982+ if parent_task :
983+ self .progress_bar .update (
984+ parent_task , advance = 1 , command = "Generating config.json"
985+ )
986+ full_elf_path = self .opts .full_elf_name or "aie.elf"
987+ await self .assemble_full_elf (config_json_path , full_elf_path , parent_task )
988+
852989 async def process_ctrlpkt (self , module_str , device_name ):
853990 with Context (), Location .unknown ():
854991 file_ctrlpkt_mlir = self .prepend_tmp (f"{ device_name } _ctrlpkt.mlir" )
@@ -935,11 +1072,6 @@ async def process_pdi_gen(self, device_name, file_design_pdi):
9351072 "-w" ,
9361073 ],
9371074 )
938- if opts .pdi :
939- tmp = file_design_pdi
940- if opts .verbose :
941- print (f"copy { tmp } to { opts .pdi_name .format (device_name )} " )
942- shutil .copy (tmp , opts .pdi_name .format (device_name ))
9431075
9441076 # generate an xclbin. The inputs are self.mlir_module_str and the cdo
9451077 # binaries from the process_cdo step.
@@ -954,7 +1086,7 @@ async def process_xclbin_gen(self, device_op, device_name):
9541086 f"{ device_name } _aie_input_partition.json"
9551087 )
9561088 file_kernels = self .prepend_tmp (f"{ device_name } _kernels.json" )
957- file_pdi = self .prepend_tmp ( f" { device_name } _design.pdi" )
1089+ file_pdi = self .pdi_file_name ( device_name )
9581090
9591091 # collect the tasks to generate the inputs to xclbinutil
9601092 processes = []
@@ -1443,6 +1575,15 @@ async def run_flow(self):
14431575 sys .exit (1 )
14441576 aie_target , aie_peano_target = aie_targets [0 ], aie_peano_targets [0 ]
14451577
1578+ # Handle full ELF generation configuration
1579+ if opts .generate_full_elf :
1580+ device_to_id_mapping = create_device_id_mapping (devices )
1581+ self .mlir_module_str = assign_load_pdi_ids (
1582+ self .mlir_module_str , device_to_id_mapping
1583+ )
1584+ transformed_mlir_path = self .prepend_tmp ("input_with_pdi_ids.mlir" )
1585+ await write_file_async (self .mlir_module_str , transformed_mlir_path )
1586+
14461587 pass_pipeline = INPUT_WITH_ADDRESSES_PIPELINE (
14471588 opts .alloc_scheme ,
14481589 opts .dynamic_objFifos ,
@@ -1524,7 +1665,7 @@ async def run_flow(self):
15241665 # 3.) Targets that require the cores to be lowered but apply across all devices
15251666
15261667 npu_insts_module = None
1527- if opts .npu or opts .elf :
1668+ if opts .npu or opts .elf or opts . generate_full_elf :
15281669 task3 = progress_bar .add_task (
15291670 "[green] Lowering NPU instructions" , total = 2 , command = ""
15301671 )
@@ -1566,6 +1707,13 @@ async def run_flow(self):
15661707 task4 ,
15671708 )
15681709
1710+ self .maxtasks = 2
1711+ task5 = progress_bar .add_task (
1712+ "[green] Creating full ELF" , total = 2 , command = ""
1713+ )
1714+ if opts .generate_full_elf :
1715+ await self .generate_full_elf (devices , device_to_id_mapping , task5 )
1716+
15691717 async def run_flow_for_device (
15701718 self ,
15711719 input_physical ,
@@ -1581,7 +1729,7 @@ async def run_flow_for_device(
15811729 nworkers = int (opts .nthreads )
15821730
15831731 # Optionally generate insts.bin for NPU instruction stream
1584- if opts .npu :
1732+ if opts .npu or opts . generate_full_elf :
15851733 # write each runtime sequence binary into its own file
15861734 runtime_sequences = generate_runtime_sequences_list (device_op )
15871735 for seq_op , seq_name in runtime_sequences :
@@ -1592,7 +1740,7 @@ async def run_flow_for_device(
15921740 npu_insts = aiedialect .translate_npu_to_binary (
15931741 npu_insts_module .operation , device_name , seq_name
15941742 )
1595- npu_insts_path = opts . insts_name . format (device_name , seq_name )
1743+ npu_insts_path = self . npu_insts_file_name (device_name , seq_name )
15961744 with open (npu_insts_path , "wb" ) as f :
15971745 f .write (struct .pack ("I" * len (npu_insts ), * npu_insts ))
15981746 pb .update (parent_task_id , advance = 1 )
@@ -1625,18 +1773,18 @@ async def run_flow_for_device(
16251773
16261774 input_physical_with_elfs_str = await read_file_async (input_physical_with_elfs )
16271775
1628- if (opts .cdo or opts .xcl or opts .pdi ) and opts .execute :
1776+ if (
1777+ opts .cdo or opts .xcl or opts .pdi or opts .generate_full_elf
1778+ ) and opts .execute :
16291779 await self .process_cdo (input_physical_with_elfs_str , device_name )
16301780
16311781 if opts .xcl :
16321782 processes .append (self .process_xclbin_gen (device_op , device_name ))
16331783 # self.process_pdi_gen is called in process_xclbin_gen,
16341784 # so don't call it again if opts.xcl is set
1635- elif opts .pdi :
1785+ elif opts .pdi or opts . generate_full_elf :
16361786 processes .append (
1637- self .process_pdi_gen (
1638- device_name , self .prepend_tmp (f"{ device_name } _design.pdi" )
1639- )
1787+ self .process_pdi_gen (device_name , self .pdi_file_name (device_name ))
16401788 )
16411789
16421790 if opts .txn and opts .execute :
0 commit comments