From 594600c63448332baefba4f4b72ea3dcef2ef740 Mon Sep 17 00:00:00 2001 From: Juni Kim Date: Wed, 26 Nov 2025 22:58:03 -0500 Subject: [PATCH 1/7] immutable tuples Turn immutable tuples into C structs, and keep mutable tuples as C pointers. --- src/finchlite/codegen/c.py | 35 ++++++++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/src/finchlite/codegen/c.py b/src/finchlite/codegen/c.py index f9b72b6d..04101328 100644 --- a/src/finchlite/codegen/c.py +++ b/src/finchlite/codegen/c.py @@ -1035,16 +1035,37 @@ def struct_c_type(fmt: AssemblyStructFType): return new_struct +def struct_c_type_wrapper(fmt: AssemblyStructFType): + """ + C type decider for struct types. Serialization actually ensures that before + crossing the FFI boundary, all serialized structs are structs, not + pointers. + + The reason why we have this method is that ctypes can intelligently infer + whether we are working with a pointer arg type (pass by reference) or a + non-pointer type (pass by value) + """ + t = struct_c_type(fmt) + if fmt.is_mutable: + return ctypes.POINTER(t) + else: + return t + + register_property( AssemblyStructFType, "c_type", "__attr__", - lambda fmt: ctypes.POINTER(struct_c_type(fmt)), + struct_c_type_wrapper, ) def struct_c_getattr(fmt: AssemblyStructFType, ctx, obj, attr): - return f"{obj}->{attr}" + if fmt.is_mutable: + # we are passing things in as a pointer (reference c_type_wrapper) + return f"{obj}->{attr}" + else: + return f"{obj}.{attr}" register_property( @@ -1056,8 +1077,10 @@ def struct_c_getattr(fmt: AssemblyStructFType, ctx, obj, attr): def struct_c_setattr(fmt: AssemblyStructFType, ctx, obj, attr, val): - ctx.emit(f"{ctx.feed}{obj}->{attr} = {val};") - return + if fmt.is_mutable: + ctx.emit(f"{ctx.feed}{obj}->{attr} = {val};") + else: + ctx.emit(f"{ctx.feed}{obj}.{attr} = {val};") register_property( @@ -1103,7 +1126,5 @@ def serialize_tuple_to_c(fmt, obj): TupleFType, "c_type", "__attr__", - lambda fmt: ctypes.POINTER( - struct_c_type(asm.NamedTupleFType("CTuple", fmt.struct_fields)) - ), + lambda fmt: struct_c_type_wrapper(asm.NamedTupleFType("CTuple", fmt.struct_fields)), ) From 072f6c9e57bf77ac063b6a229ad8e059dd4d0891 Mon Sep 17 00:00:00 2001 From: Juni Kim Date: Wed, 26 Nov 2025 23:01:36 -0500 Subject: [PATCH 2/7] ruff reasons --- src/finchlite/codegen/c.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/finchlite/codegen/c.py b/src/finchlite/codegen/c.py index 04101328..47561709 100644 --- a/src/finchlite/codegen/c.py +++ b/src/finchlite/codegen/c.py @@ -1048,8 +1048,7 @@ def struct_c_type_wrapper(fmt: AssemblyStructFType): t = struct_c_type(fmt) if fmt.is_mutable: return ctypes.POINTER(t) - else: - return t + return t register_property( @@ -1064,8 +1063,7 @@ def struct_c_getattr(fmt: AssemblyStructFType, ctx, obj, attr): if fmt.is_mutable: # we are passing things in as a pointer (reference c_type_wrapper) return f"{obj}->{attr}" - else: - return f"{obj}.{attr}" + return f"{obj}.{attr}" register_property( From 41134698b3c59c4a1272e3087c7461fe72faaf7b Mon Sep 17 00:00:00 2001 From: Juni Kim Date: Thu, 27 Nov 2025 10:24:51 -0500 Subject: [PATCH 3/7] c.py invocation and serialization errors fixed --- src/finchlite/codegen/c.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/finchlite/codegen/c.py b/src/finchlite/codegen/c.py index 47561709..d67e2aad 100644 --- a/src/finchlite/codegen/c.py +++ b/src/finchlite/codegen/c.py @@ -140,7 +140,7 @@ def construct_from_c(fmt, c_obj): return fmt.construct_from_c(c_obj) try: return query_property(fmt, "construct_from_c", "__attr__", c_obj) - except NotImplementedError: + except AttributeError: return fmt(c_obj) @@ -254,11 +254,9 @@ def __call__(self, *args): self.argtypes, args, serial_args, strict=False ): deserialize_from_c(type_, arg, serial_arg) - if hasattr(self.ret_type, "construct_from_c"): - return construct_from_c(res.ftype, res) if self.ret_type is type(None): return None - return self.ret_type(res) + return construct_from_c(self.ret_type, res) class CModule: @@ -315,7 +313,7 @@ def __call__(self, prgm): for func in prgm.funcs: match func: case asm.Function(asm.Variable(func_name, return_t), args, _): - return_t = c_type(return_t) + #return_t = c_type(return_t) arg_ts = [arg.result_format for arg in args] kern = CKernel(getattr(lib, func_name), return_t, arg_ts) kernels[func_name] = kern @@ -1113,11 +1111,16 @@ def serialize_tuple_to_c(fmt, obj): "__attr__", serialize_tuple_to_c, ) + +def tuple_construct_from_c(fmt: TupleFType, c_struct): + args = [getattr(c_struct, name) for name in fmt.struct_fieldnames] + return tuple(args) + register_property( TupleFType, "construct_from_c", "__attr__", - lambda fmt, obj, c_tuple: tuple(c_tuple), + tuple_construct_from_c, ) register_property( From 7f1431e3da1ec1fce0a1e4f02bcf710c9ac68a2f Mon Sep 17 00:00:00 2001 From: Juni Kim Date: Thu, 27 Nov 2025 11:57:27 -0500 Subject: [PATCH 4/7] ruff format --- src/finchlite/codegen/c.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/finchlite/codegen/c.py b/src/finchlite/codegen/c.py index d67e2aad..ba1d8c60 100644 --- a/src/finchlite/codegen/c.py +++ b/src/finchlite/codegen/c.py @@ -313,7 +313,7 @@ def __call__(self, prgm): for func in prgm.funcs: match func: case asm.Function(asm.Variable(func_name, return_t), args, _): - #return_t = c_type(return_t) + # return_t = c_type(return_t) arg_ts = [arg.result_format for arg in args] kern = CKernel(getattr(lib, func_name), return_t, arg_ts) kernels[func_name] = kern @@ -1112,10 +1112,12 @@ def serialize_tuple_to_c(fmt, obj): serialize_tuple_to_c, ) + def tuple_construct_from_c(fmt: TupleFType, c_struct): args = [getattr(c_struct, name) for name in fmt.struct_fieldnames] return tuple(args) + register_property( TupleFType, "construct_from_c", From 21f281593d15409cbf5202715cc3132478daf2b5 Mon Sep 17 00:00:00 2001 From: Juni Kim Date: Thu, 27 Nov 2025 14:02:00 -0500 Subject: [PATCH 5/7] numpy serialization also fixed --- src/finchlite/codegen/c.py | 14 +++++++++++++- tests/scripts/safebufferaccess.py | 21 +++++++++++---------- tests/test_codegen.py | 2 +- 3 files changed, 25 insertions(+), 12 deletions(-) diff --git a/src/finchlite/codegen/c.py b/src/finchlite/codegen/c.py index ba1d8c60..f79f6a05 100644 --- a/src/finchlite/codegen/c.py +++ b/src/finchlite/codegen/c.py @@ -204,12 +204,24 @@ def construct_from_c(fmt, c_obj): register_property(t, "construct_from_c", "__attr__", lambda fmt, c_value: c_value) register_property(t, "numba_type", "__attr__", lambda t: t) + +def scalar_to_ctypes_copy(fmt, obj): + """ + This hack is required because it turns out that scalars don't own memory or smth + """ + arr = np.array([obj], dtype=obj.dtype, copy=True) + scalar_ctype = np.ctypeslib.as_ctypes_type(obj.dtype) + ptr_ctype = ctypes.POINTER(scalar_ctype) + return arr.ctypes.data_as(ptr_ctype).contents + + register_property( np.generic, "serialize_to_c", "__attr__", - lambda fmt, obj: np.ctypeslib.as_ctypes(obj), + scalar_to_ctypes_copy, ) + # pass by value -> no op register_property( np.generic, diff --git a/tests/scripts/safebufferaccess.py b/tests/scripts/safebufferaccess.py index af133cce..9e279e03 100755 --- a/tests/scripts/safebufferaccess.py +++ b/tests/scripts/safebufferaccess.py @@ -9,7 +9,6 @@ """ import argparse -import ctypes import numpy as np @@ -25,22 +24,22 @@ subparser = parser.add_subparsers(required=True, dest="subparser_name") load = subparser.add_parser("load", help="attempt to load some element") -load.add_argument("index", type=int, help="the index to load") +load.add_argument("index", type=np.intp, help="the index to load") store = subparser.add_parser("store", help="attempt to store into some element") -store.add_argument("index", type=int, help="the index to load") -store.add_argument("value", type=int, help="the value to store") +store.add_argument("index", type=np.intp, help="the index to load") +store.add_argument("value", type=np.int64, help="the value to store") args = parser.parse_args() -a = np.array(range(args.size), dtype=ctypes.c_int64) +a = np.array(range(args.size), dtype=np.int64) ab = NumpyBuffer(a) ab_safe = SafeBuffer(ab) ab_v = asm.Variable("a", ab_safe.ftype) ab_slt = asm.Slot("a_", ab_safe.ftype) -idx = asm.Variable("idx", ctypes.c_size_t) -val = asm.Variable("val", ctypes.c_int64) +idx = asm.Variable("idx", np.intp) +val = asm.Variable("val", np.int64) res_var = asm.Variable("val", ab_safe.ftype.element_type) res_var2 = asm.Variable("val2", ab_safe.ftype.element_type) @@ -64,6 +63,7 @@ res_var2, asm.Load(ab_slt, idx), ), + asm.Repack(ab_slt), asm.Return(res_var), ) ), @@ -79,7 +79,8 @@ idx, val, ), - asm.Return(asm.Literal(ctypes.c_int64(0))), + asm.Repack(ab_slt), + asm.Return(asm.Literal(0)), ) ), ), @@ -91,8 +92,8 @@ match args.subparser_name: case "load": - print(access(ab_safe, ctypes.c_size_t(args.index)).value) + print(access(ab_safe, args.index)) case "store": - change(ab_safe, ctypes.c_size_t(args.index), ctypes.c_int64(args.value)) + change(ab_safe, args.index, args.value) arr = [str(ab_safe.load(i)) for i in range(args.size)] print(f"[{' '.join(arr)}]") diff --git a/tests/test_codegen.py b/tests/test_codegen.py index 202c406e..a06e22a5 100644 --- a/tests/test_codegen.py +++ b/tests/test_codegen.py @@ -260,7 +260,7 @@ def test_malloc_resize(new_size): ) ) mod = CCompiler()(prgm) - assert mod.length(ab).value == new_size + assert mod.length(ab) == new_size assert ab.length() == new_size for i in range(new_size): assert ab.load(i) == 0 if i >= len(a) else a[i] From 8cd031ec1d2a8be29a88418073815dbae100a7dd Mon Sep 17 00:00:00 2001 From: Juni Kim Date: Mon, 1 Dec 2025 20:52:50 -0500 Subject: [PATCH 6/7] introduce immutable and mutable structs Even though literally everything should be immutable right now --- src/finchlite/codegen/c.py | 69 +++++++++++++----------- src/finchlite/finch_assembly/__init__.py | 16 +++--- src/finchlite/finch_assembly/struct.py | 21 +++++++- 3 files changed, 66 insertions(+), 40 deletions(-) diff --git a/src/finchlite/codegen/c.py b/src/finchlite/codegen/c.py index f79f6a05..1600060c 100644 --- a/src/finchlite/codegen/c.py +++ b/src/finchlite/codegen/c.py @@ -13,9 +13,16 @@ import numpy as np +from finchlite.finch_assembly.struct import MutableStructFType + from .. import finch_assembly as asm from ..algebra import query_property, register_property -from ..finch_assembly import AssemblyStructFType, BufferFType, TupleFType +from ..finch_assembly import ( + AssemblyStructFType, + BufferFType, + ImmutableStructFType, + TupleFType, +) from ..symbolic import Context, Namespace, ScopedDict, fisinstance, ftype from ..util import config from ..util.cache import file_cache @@ -1045,57 +1052,55 @@ def struct_c_type(fmt: AssemblyStructFType): return new_struct -def struct_c_type_wrapper(fmt: AssemblyStructFType): - """ - C type decider for struct types. Serialization actually ensures that before - crossing the FFI boundary, all serialized structs are structs, not - pointers. - - The reason why we have this method is that ctypes can intelligently infer - whether we are working with a pointer arg type (pass by reference) or a - non-pointer type (pass by value) - """ - t = struct_c_type(fmt) - if fmt.is_mutable: - return ctypes.POINTER(t) - return t +""" +Note: When serializing any struct to C, it will get serialized to a struct with +no indirection. +When you pass a struct into a kernel that expects a struct pointer, ctypes can +intelligently infer whether we are working with a pointer arg type (pass by +reference) or a non-pointer type (in which case it will immediately apply +indirection) +""" register_property( - AssemblyStructFType, + MutableStructFType, "c_type", "__attr__", - struct_c_type_wrapper, + lambda fmt: ctypes.POINTER(struct_c_type(fmt)), ) +register_property( + ImmutableStructFType, "c_type", "__attr__", lambda fmt: struct_c_type(fmt) +) -def struct_c_getattr(fmt: AssemblyStructFType, ctx, obj, attr): - if fmt.is_mutable: - # we are passing things in as a pointer (reference c_type_wrapper) - return f"{obj}->{attr}" - return f"{obj}.{attr}" +register_property( + MutableStructFType, + "c_getattr", + "__attr__", + lambda fmt, ctx, obj, attr: f"{obj}->{attr}", +) register_property( - AssemblyStructFType, + ImmutableStructFType, "c_getattr", "__attr__", - struct_c_getattr, + lambda fmt, ctx, obj, attr: f"{obj}.{attr}", ) -def struct_c_setattr(fmt: AssemblyStructFType, ctx, obj, attr, val): - if fmt.is_mutable: - ctx.emit(f"{ctx.feed}{obj}->{attr} = {val};") - else: - ctx.emit(f"{ctx.feed}{obj}.{attr} = {val};") +def struct_mutable_setattr(fmt: AssemblyStructFType, ctx, obj, attr, val): + ctx.emit(f"{ctx.feed}{obj}->{attr} = {val};") +# the equivalent for immutable is f"{ctx.feed}{obj}.{attr} = {val};" +# but we will not include that because it's bad. + register_property( - AssemblyStructFType, + MutableStructFType, "c_setattr", "__attr__", - struct_c_setattr, + struct_mutable_setattr, ) @@ -1141,5 +1146,5 @@ def tuple_construct_from_c(fmt: TupleFType, c_struct): TupleFType, "c_type", "__attr__", - lambda fmt: struct_c_type_wrapper(asm.NamedTupleFType("CTuple", fmt.struct_fields)), + lambda fmt: struct_c_type(asm.NamedTupleFType("CTuple", fmt.struct_fields)), ) diff --git a/src/finchlite/finch_assembly/__init__.py b/src/finchlite/finch_assembly/__init__.py index a2417d22..293510f6 100644 --- a/src/finchlite/finch_assembly/__init__.py +++ b/src/finchlite/finch_assembly/__init__.py @@ -1,9 +1,5 @@ from .buffer import Buffer, BufferFType, element_type, length_type -from .cfg_builder import ( - AssemblyCFGBuilder, - assembly_build_cfg, - assembly_number_uses, -) +from .cfg_builder import AssemblyCFGBuilder, assembly_build_cfg, assembly_number_uses from .dataflow import AssemblyCopyPropagation, assembly_copy_propagation from .interpreter import AssemblyInterpreter, AssemblyInterpreterKernel from .nodes import ( @@ -36,7 +32,13 @@ Variable, WhileLoop, ) -from .struct import AssemblyStructFType, NamedTupleFType, TupleFType +from .struct import ( + AssemblyStructFType, + ImmutableStructFType, + MutableStructFType, + NamedTupleFType, + TupleFType, +) from .type_checker import AssemblyTypeChecker, AssemblyTypeError, assembly_check_types __all__ = [ @@ -61,10 +63,12 @@ "GetAttr", "If", "IfElse", + "ImmutableStructFType", "Length", "Literal", "Load", "Module", + "MutableStructFType", "NamedTupleFType", "Print", "Repack", diff --git a/src/finchlite/finch_assembly/struct.py b/src/finchlite/finch_assembly/struct.py index b855f87e..f2bb138e 100644 --- a/src/finchlite/finch_assembly/struct.py +++ b/src/finchlite/finch_assembly/struct.py @@ -45,7 +45,24 @@ def struct_attrtype(self, attr: str) -> Any: return dict(self.struct_fields)[attr] -class NamedTupleFType(AssemblyStructFType): +class ImmutableStructFType(AssemblyStructFType): + @property + def is_mutable(self) -> bool: + return False + + +class MutableStructFType(AssemblyStructFType): + """ + Class for a mutable assembly struct type. + It is currently not used anywhere, but maybe it will be useful in the future? + """ + + @property + def is_mutable(self) -> bool: + return True + + +class NamedTupleFType(ImmutableStructFType): def __init__(self, struct_name, struct_fields): self._struct_name = struct_name self._struct_fields = struct_fields @@ -79,7 +96,7 @@ def __call__(self, *args): return namedtuple(self.struct_name, self.struct_fieldnames)(args) -class TupleFType(AssemblyStructFType): +class TupleFType(ImmutableStructFType): def __init__(self, struct_name, struct_formats): self._struct_name = struct_name self._struct_formats = struct_formats From 69d89e1fc529ea1043e0449d3cf25e3f71f2daf5 Mon Sep 17 00:00:00 2001 From: Juni Kim Date: Tue, 2 Dec 2025 09:58:04 -0500 Subject: [PATCH 7/7] simplify numpy serialization --- src/finchlite/codegen/c.py | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/src/finchlite/codegen/c.py b/src/finchlite/codegen/c.py index 1600060c..953536ef 100644 --- a/src/finchlite/codegen/c.py +++ b/src/finchlite/codegen/c.py @@ -13,14 +13,13 @@ import numpy as np -from finchlite.finch_assembly.struct import MutableStructFType - from .. import finch_assembly as asm from ..algebra import query_property, register_property from ..finch_assembly import ( AssemblyStructFType, BufferFType, ImmutableStructFType, + MutableStructFType, TupleFType, ) from ..symbolic import Context, Namespace, ScopedDict, fisinstance, ftype @@ -212,21 +211,11 @@ def construct_from_c(fmt, c_obj): register_property(t, "numba_type", "__attr__", lambda t: t) -def scalar_to_ctypes_copy(fmt, obj): - """ - This hack is required because it turns out that scalars don't own memory or smth - """ - arr = np.array([obj], dtype=obj.dtype, copy=True) - scalar_ctype = np.ctypeslib.as_ctypes_type(obj.dtype) - ptr_ctype = ctypes.POINTER(scalar_ctype) - return arr.ctypes.data_as(ptr_ctype).contents - - register_property( np.generic, "serialize_to_c", "__attr__", - scalar_to_ctypes_copy, + lambda fmt, obj: np.ctypeslib.as_ctypes(np.array(obj)), ) # pass by value -> no op