Skip to content

Commit ffdf9c5

Browse files
authored
Add full ELF flow support with load_pdi instruction (#2641)
1 parent 4a55ee7 commit ffdf9c5

File tree

8 files changed

+354
-37
lines changed

8 files changed

+354
-37
lines changed

include/aie/Dialect/AIEX/IR/AIEX.td

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1028,16 +1028,19 @@ def AIE_NpuWriteBdOp: AIEX_Op<"npu.writebd", []> {
10281028
def AIE_NpuLoadPdiOp: AIEX_Op<"npu.load_pdi", []> {
10291029
let summary = "load pdi operator";
10301030
let arguments = (
1031-
ins I32Attr:$id,
1032-
OptionalAttr<I32Attr>:$size,
1033-
OptionalAttr<UI64Attr>:$address
1031+
ins OptionalAttr<FlatSymbolRefAttr>:$device_ref,
1032+
DefaultValuedOptionalAttr<I32Attr, "0">:$id,
1033+
DefaultValuedOptionalAttr<I32Attr, "0">:$size,
1034+
DefaultValuedOptionalAttr<UI64Attr, "0">:$address
10341035
);
10351036
let results = (outs );
10361037
let assemblyFormat = [{ attr-dict }];
10371038
let description = [{
10381039
Load a PDI (Programmable Device Image) to configure the NPU.
10391040
The PDI is identified by `id`. `address` and `size` are typically written at
10401041
runtime by the driver or host program.
1042+
1043+
If a symbol reference is provided, the compiler driver (aiecc.py) will match it to a device symbol name and assign the PDI ID field based on it.
10411044
}];
10421045
}
10431046

lib/Conversion/AIEToConfiguration/AIEToConfiguration.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -498,7 +498,8 @@ emitTransactionOps(OpBuilder &builder,
498498
IntegerAttr addressAttr =
499499
IntegerAttr::get(ui64Ty, llvm::APInt(64, payloadInfo.address));
500500

501-
builder.create<AIEX::NpuLoadPdiOp>(loc, idAttr, sizeAttr, addressAttr);
501+
builder.create<AIEX::NpuLoadPdiOp>(loc, nullptr, idAttr, sizeAttr,
502+
addressAttr);
502503
} else if (op.cmd.Opcode == XAie_TxnOpcode::XAIE_IO_CUSTOM_OP_DDR_PATCH) {
503504
if (!op.addressPatch) {
504505
llvm::errs()

python/compiler/aiecc/cl_arguments.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -374,6 +374,19 @@ def parse_args(args=None):
374374
default="",
375375
help="Symbol name of the runtime sequence to compile. If none supplied, all runtime sequences in the selected device(s) are compiled.",
376376
)
377+
parser.add_argument(
378+
"--generate-full-elf",
379+
dest="generate_full_elf",
380+
default=False,
381+
action="store_true",
382+
help="Generate complete full ELF using aiebu-asm",
383+
)
384+
parser.add_argument(
385+
"--full-elf-name",
386+
dest="full_elf_name",
387+
default="aie.elf",
388+
help="Output filename for full ELF (default: aie.elf)",
389+
)
377390

378391
opts = parser.parse_args(args)
379392
return opts

python/compiler/aiecc/main.py

Lines changed: 177 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,16 @@
3232
import aie.compiler.aiecc.configure
3333
from aie.dialects import aie as aiedialect
3434
from aie.dialects import aiex as aiexdialect
35-
from aie.ir import Context, Location, Module, InsertionPoint, IndexType, StringAttr
35+
from aie.ir import (
36+
Context,
37+
Location,
38+
Module,
39+
InsertionPoint,
40+
IndexType,
41+
StringAttr,
42+
IntegerAttr,
43+
IntegerType,
44+
)
3645
from aie.passmanager import PassManager
3746

3847

@@ -319,6 +328,58 @@ def generate_runtime_sequences_list(device_op):
319328
]
320329

321330

331+
def find_aiebu_asm():
332+
asm_bin = "aiebu-asm"
333+
if shutil.which(asm_bin) is None:
334+
asm_bin = os.path.join("/", "opt", "xilinx", "aiebu", "bin", "aiebu-asm")
335+
if shutil.which(asm_bin) is None:
336+
asm_bin = None
337+
if asm_bin is None:
338+
print(
339+
"Error: aiebu-asm not found.",
340+
file=sys.stderr,
341+
)
342+
sys.exit(1)
343+
return asm_bin
344+
345+
346+
def create_device_id_mapping(devices):
347+
"""Assign an ID to each device in the MLIR; used later to assign IDs for each PDI"""
348+
device_to_id = {}
349+
for i, (device_op, device_name) in enumerate(devices, 1):
350+
device_to_id[device_name] = i
351+
return device_to_id
352+
353+
354+
def assign_load_pdi_ids(mlir_module_str, device_to_id_mapping):
355+
"""Transform symbolic aiex.npu.load_pdi references to numeric IDs"""
356+
with Context() as context, Location.unknown():
357+
module = Module.parse(mlir_module_str)
358+
359+
for runtime_seq in find_ops(
360+
module.operation,
361+
lambda o: isinstance(o.operation.opview, aiexdialect.RuntimeSequenceOp),
362+
):
363+
for load_pdi_op in find_ops(
364+
runtime_seq.operation,
365+
lambda o: isinstance(o.operation.opview, aiexdialect.NpuLoadPdiOp)
366+
and hasattr(o, "device_ref")
367+
and o.device_ref is not None,
368+
):
369+
device_name = load_pdi_op.device_ref.value
370+
if device_name not in device_to_id_mapping:
371+
print(
372+
f"Warning: Device '{device_name}' for load_pdi instruction does not have a matching device PDI."
373+
)
374+
sys.exit(1)
375+
pdi_id = device_to_id_mapping[device_name]
376+
load_pdi_op.id = IntegerAttr.get(
377+
IntegerType.get_signless(32, context=context), pdi_id
378+
)
379+
380+
return str(module)
381+
382+
322383
def set_elf_file_for_core(core, path):
323384
with InsertionPoint.at_block_terminator(
324385
core.parent.regions[0].blocks[0]
@@ -493,6 +554,20 @@ def __init__(self, mlir_module_str, opts, tmpdirname):
493554
def prepend_tmp(self, x):
494555
return os.path.join(self.tmpdirname, x)
495556

557+
def pdi_file_name(self, device_name):
558+
return (
559+
opts.pdi_name.format(device_name)
560+
if opts.pdi
561+
else self.prepend_tmp(f"{device_name}.pdi")
562+
)
563+
564+
def npu_insts_file_name(self, device_name, seq_name):
565+
return (
566+
opts.insts_name.format(device_name, seq_name)
567+
if opts.npu
568+
else self.prepend_tmp(f"{device_name}_{seq_name}.bin")
569+
)
570+
496571
async def do_call(self, task_id, command, force=False):
497572
if self.stopall:
498573
return
@@ -800,20 +875,7 @@ async def process_txn(self, module_str, device_name):
800875
return file_txn
801876

802877
async def aiebu_asm(self, input_file, output_file, ctrl_packet_file=None):
803-
804-
# find aiebu-asm binary
805-
asm_bin = "aiebu-asm"
806-
if shutil.which(asm_bin) is None:
807-
asm_bin = os.path.join("/", "opt", "xilinx", "aiebu", "bin", "aiebu-asm")
808-
if shutil.which(asm_bin) is None:
809-
asm_bin = None
810-
811-
if asm_bin is None:
812-
print(
813-
"Error: aiebu-asm not found, generation of ELF file failed.",
814-
file=sys.stderr,
815-
)
816-
sys.exit(1)
878+
asm_bin = find_aiebu_asm()
817879

818880
args = [
819881
asm_bin,
@@ -849,6 +911,81 @@ async def aiebu_asm(self, input_file, output_file, ctrl_packet_file=None):
849911

850912
await self.do_call(None, args)
851913

914+
async def generate_full_elf_config_json(
915+
self, devices, device_to_id_mapping, opts, parent_task=None
916+
):
917+
config = {"xrt-kernels": []}
918+
919+
for device_op, device_name in devices:
920+
sequences = generate_runtime_sequences_list(device_op)
921+
922+
max_arg_count = max(
923+
len(seq_op.body.blocks[0].arguments) for seq_op, seq_name in sequences
924+
)
925+
arguments = [
926+
{"name": f"arg_{i}", "type": "char *", "offset": hex(i * 8)}
927+
for i in range(max_arg_count)
928+
]
929+
930+
kernel_entry = {
931+
"name": device_name,
932+
"arguments": arguments,
933+
"instance": [],
934+
"PDIs": [],
935+
}
936+
937+
pdi_id = device_to_id_mapping[device_name]
938+
pdi_filename = self.pdi_file_name(device_name)
939+
kernel_entry["PDIs"].append({"id": pdi_id, "PDI_file": pdi_filename})
940+
941+
for seq_op, seq_name in sequences:
942+
insts_filename = self.npu_insts_file_name(device_name, seq_name)
943+
kernel_entry["instance"].append(
944+
{"id": seq_name, "TXN_ctrl_code_file": insts_filename}
945+
)
946+
947+
config["xrt-kernels"].append(kernel_entry)
948+
949+
return config
950+
951+
async def assemble_full_elf(
952+
self, config_json_path, output_elf_path, parent_task=None
953+
):
954+
asm_bin = find_aiebu_asm()
955+
args = [
956+
asm_bin,
957+
"-t",
958+
"aie2_config",
959+
"-j",
960+
config_json_path,
961+
"-o",
962+
output_elf_path,
963+
]
964+
await self.do_call(parent_task, args)
965+
if self.opts.verbose:
966+
print(f"Generated full ELF: {output_elf_path}")
967+
968+
async def generate_full_elf(self, devices, device_to_id_mapping, parent_task=None):
969+
"""Generate config.json and invoke aiebu-asm after all artifacts are ready"""
970+
if parent_task:
971+
self.progress_bar.update(
972+
parent_task, advance=0, command="Generating config.json"
973+
)
974+
config = await self.generate_full_elf_config_json(
975+
devices, device_to_id_mapping, self.opts, parent_task
976+
)
977+
config_json_path = self.prepend_tmp("config.json")
978+
await write_file_async(json.dumps(config, indent=2), config_json_path)
979+
if self.opts.verbose:
980+
if self.opts.verbose:
981+
print(f"Generated config.json: {config_json_path}")
982+
if parent_task:
983+
self.progress_bar.update(
984+
parent_task, advance=1, command="Generating config.json"
985+
)
986+
full_elf_path = self.opts.full_elf_name or "aie.elf"
987+
await self.assemble_full_elf(config_json_path, full_elf_path, parent_task)
988+
852989
async def process_ctrlpkt(self, module_str, device_name):
853990
with Context(), Location.unknown():
854991
file_ctrlpkt_mlir = self.prepend_tmp(f"{device_name}_ctrlpkt.mlir")
@@ -935,11 +1072,6 @@ async def process_pdi_gen(self, device_name, file_design_pdi):
9351072
"-w",
9361073
],
9371074
)
938-
if opts.pdi:
939-
tmp = file_design_pdi
940-
if opts.verbose:
941-
print(f"copy {tmp} to {opts.pdi_name.format(device_name)}")
942-
shutil.copy(tmp, opts.pdi_name.format(device_name))
9431075

9441076
# generate an xclbin. The inputs are self.mlir_module_str and the cdo
9451077
# binaries from the process_cdo step.
@@ -954,7 +1086,7 @@ async def process_xclbin_gen(self, device_op, device_name):
9541086
f"{device_name}_aie_input_partition.json"
9551087
)
9561088
file_kernels = self.prepend_tmp(f"{device_name}_kernels.json")
957-
file_pdi = self.prepend_tmp(f"{device_name}_design.pdi")
1089+
file_pdi = self.pdi_file_name(device_name)
9581090

9591091
# collect the tasks to generate the inputs to xclbinutil
9601092
processes = []
@@ -1443,6 +1575,15 @@ async def run_flow(self):
14431575
sys.exit(1)
14441576
aie_target, aie_peano_target = aie_targets[0], aie_peano_targets[0]
14451577

1578+
# Handle full ELF generation configuration
1579+
if opts.generate_full_elf:
1580+
device_to_id_mapping = create_device_id_mapping(devices)
1581+
self.mlir_module_str = assign_load_pdi_ids(
1582+
self.mlir_module_str, device_to_id_mapping
1583+
)
1584+
transformed_mlir_path = self.prepend_tmp("input_with_pdi_ids.mlir")
1585+
await write_file_async(self.mlir_module_str, transformed_mlir_path)
1586+
14461587
pass_pipeline = INPUT_WITH_ADDRESSES_PIPELINE(
14471588
opts.alloc_scheme,
14481589
opts.dynamic_objFifos,
@@ -1524,7 +1665,7 @@ async def run_flow(self):
15241665
# 3.) Targets that require the cores to be lowered but apply across all devices
15251666

15261667
npu_insts_module = None
1527-
if opts.npu or opts.elf:
1668+
if opts.npu or opts.elf or opts.generate_full_elf:
15281669
task3 = progress_bar.add_task(
15291670
"[green] Lowering NPU instructions", total=2, command=""
15301671
)
@@ -1566,6 +1707,13 @@ async def run_flow(self):
15661707
task4,
15671708
)
15681709

1710+
self.maxtasks = 2
1711+
task5 = progress_bar.add_task(
1712+
"[green] Creating full ELF", total=2, command=""
1713+
)
1714+
if opts.generate_full_elf:
1715+
await self.generate_full_elf(devices, device_to_id_mapping, task5)
1716+
15691717
async def run_flow_for_device(
15701718
self,
15711719
input_physical,
@@ -1581,7 +1729,7 @@ async def run_flow_for_device(
15811729
nworkers = int(opts.nthreads)
15821730

15831731
# Optionally generate insts.bin for NPU instruction stream
1584-
if opts.npu:
1732+
if opts.npu or opts.generate_full_elf:
15851733
# write each runtime sequence binary into its own file
15861734
runtime_sequences = generate_runtime_sequences_list(device_op)
15871735
for seq_op, seq_name in runtime_sequences:
@@ -1592,7 +1740,7 @@ async def run_flow_for_device(
15921740
npu_insts = aiedialect.translate_npu_to_binary(
15931741
npu_insts_module.operation, device_name, seq_name
15941742
)
1595-
npu_insts_path = opts.insts_name.format(device_name, seq_name)
1743+
npu_insts_path = self.npu_insts_file_name(device_name, seq_name)
15961744
with open(npu_insts_path, "wb") as f:
15971745
f.write(struct.pack("I" * len(npu_insts), *npu_insts))
15981746
pb.update(parent_task_id, advance=1)
@@ -1625,18 +1773,18 @@ async def run_flow_for_device(
16251773

16261774
input_physical_with_elfs_str = await read_file_async(input_physical_with_elfs)
16271775

1628-
if (opts.cdo or opts.xcl or opts.pdi) and opts.execute:
1776+
if (
1777+
opts.cdo or opts.xcl or opts.pdi or opts.generate_full_elf
1778+
) and opts.execute:
16291779
await self.process_cdo(input_physical_with_elfs_str, device_name)
16301780

16311781
if opts.xcl:
16321782
processes.append(self.process_xclbin_gen(device_op, device_name))
16331783
# self.process_pdi_gen is called in process_xclbin_gen,
16341784
# so don't call it again if opts.xcl is set
1635-
elif opts.pdi:
1785+
elif opts.pdi or opts.generate_full_elf:
16361786
processes.append(
1637-
self.process_pdi_gen(
1638-
device_name, self.prepend_tmp(f"{device_name}_design.pdi")
1639-
)
1787+
self.process_pdi_gen(device_name, self.pdi_file_name(device_name))
16401788
)
16411789

16421790
if opts.txn and opts.execute:

test/aiecc/generate_pdi.mlir

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,10 @@
1616

1717
// RUN: ls | grep MlirAie | FileCheck %s --check-prefix=CHECK-FILE
1818

19-
// XCHESSCC: bootgen
20-
// XCHESSCC: copy{{.*}} to MlirAie0.pdi
19+
// XCHESSCC: bootgen {{.*}} MlirAie0.pdi
2120
// XCHESSCC-NOT: xclbinutil
2221

23-
// PEANO: bootgen
24-
// PEANO: copy{{.*}} to MlirAie1.pdi
22+
// PEANO: bootgen {{.*}} MlirAie1.pdi
2523
// PEANO-NOT: xclbinutil
2624

2725
// CHECK-FILE: MlirAie0.pdi

0 commit comments

Comments
 (0)