Support the buffer mutation protocol via the FxImporter. (#577)

stellaraccident · Ubuntu · web-flow · commit d1cd92fa1801 · 2024-04-16T10:52:12.000-05:00
Taken along with llvm/torch-mlir#3074 and the hack job here (https://gist.github.com/stellaraccident/83f91c7316ea668d59e0718e179e2cfd), this gives us a path to export hermetic training steps from PyTorch. --------- Co-authored-by: Ubuntu <kyle@kyle-mem.judsoscro3wupi0qm4bjlj5m3b.bx.internal.cloudapp.net>
diff --git a/core/shark_turbine/aot/builtins/__init__.py b/core/shark_turbine/aot/builtins/__init__.py
@@ -36,5 +36,6 @@
     "export_global",
     "export_global_tree",
     "export_parameters",
+    "export_buffers",
     "jittable",
 ]
diff --git a/core/shark_turbine/aot/builtins/globals.py b/core/shark_turbine/aot/builtins/globals.py
@@ -29,6 +29,14 @@
 )
 
 
+__all__ = [
+    "export_global",
+    "export_global_tree",
+    "export_parameters",
+    "export_buffers",
+]
+
+
 class export_global(GlobalsDef, Abstractifiable):
     """Exports a single global into a CompiledModule."""
 
@@ -164,6 +172,60 @@ def __repr__(self):
         return f"<export_parameters {', '.join(names)}>"
 
 
+class export_buffers(GlobalsDef, TreeAbstractifiable):
+    """Exports buffers from an nn.Module.
+
+    These are exposed to procedural programs as a dictionary of param/values.
+    """
+
+    __slots__ = [
+        "_buffer_list",
+        "_schema",
+        "_tree",
+    ]
+
+    def __init__(
+        self,
+        nn_module: nn.Module,
+        *,
+        mutable: Optional[bool] = None,
+        external: Optional[bool] = None,
+        external_scope: Optional[str] = None,
+        name_mapper: Optional[NameMapCallback] = None,
+        uninitialized: Optional[bool] = None,
+        attrs: Optional[GlobalAttributes] = None,
+    ):
+        if attrs is None:
+            attrs = GlobalAttributes(
+                mutable=bool(mutable),
+                external=external,
+                external_scope=external_scope,
+                name_mapper=name_mapper,
+                uninitialized=uninitialized,
+            )
+        super().__init__(attrs)
+        self._buffer_list = list(nn_module.named_buffers())
+        self._tree = dict(self._buffer_list)
+        _, self._schema = tree_flatten(self._tree)
+
+    def items(self):
+        for name, value in self._buffer_list:
+            yield (name, value)
+
+    def schema(self) -> TreeSpec:
+        return self._schema
+
+    def abstractify_tree(self):
+        return tree_map(abstractify_single_value, self._tree)
+
+    def __getitem__(self, key):
+        return self._tree[key]
+
+    def __repr__(self):
+        names = [name for name, _ in self._param_list]
+        return f"<export_buffers {', '.join(names)}>"
+
+
 def _transform_tree_to_names(prefix: str, tree):
     """Produces a topologically similar tree but where each value is a fully qualified name."""
     join = lambda key: f"{prefix}.{key}" if prefix else key
diff --git a/core/shark_turbine/aot/support/ir_utils.py b/core/shark_turbine/aot/support/ir_utils.py
@@ -25,6 +25,7 @@
 )
 
 from ...support.ir_imports import (
+    AsmState,
     Attribute,
     BF16Type,
     DenseElementsAttr,
@@ -197,7 +198,7 @@ def handle_mlir_error(self, op: Operation, e: MLIRError, message: str):
         try:
             with open(dump_path, "wb") as f:
                 op.print(
-                    f,
+                    file=f,
                     binary=True,
                     print_generic_op_form=True,
                     large_elements_limit=100,
diff --git a/core/shark_turbine/aot/support/procedural/exported_program.py b/core/shark_turbine/aot/support/procedural/exported_program.py
@@ -27,6 +27,7 @@
     FxImporter,
     FxImporterHooks,
     GraphNodeImporter,
+    InputInfo,
 )
 
 from ....support.logging import aot_logger as logger
@@ -219,6 +220,27 @@ class _Hooks(FxImporterHooks):
     def __init__(self, module_builder: ModuleBuilder):
         self.module_builder = module_builder
 
+    def store_produced_value(
+        self,
+        gni: GraphNodeImporter,
+        py_value: Any,
+        produced_ir_value: Any,
+        info: InputInfo,
+    ):
+        module_builder = self.module_builder
+        # See if we know about it.
+        mapping = module_builder.global_ref_tracker.track(py_value)
+        if mapping.is_empty:
+            raise ValueError(f"Cannot store value to unmapped global for: {info}")
+        logger.debug("Resolved  global for store %r", mapping)
+        materialized_global: MaterializedGlobal = mapping.value  # type: ignore
+        converted_value = Operation.create(
+            "torch_c.to_builtin_tensor",
+            results=[materialized_global.ir_type],
+            operands=[produced_ir_value],
+        ).result
+        util_d.GlobalStoreOp(converted_value, materialized_global.symbol_name)
+
     def resolve_literal(self, gni: GraphNodeImporter, literal: Any) -> Optional[Value]:
         # We support resolution of tracked reference types. Currently this
         # only includes Tensors. All others we let the importer do what it
diff --git a/core/shark_turbine/support/ir_imports.py b/core/shark_turbine/support/ir_imports.py
@@ -8,6 +8,7 @@
 """Unifies all imports of iree.compiler.ir into one place."""
 
 from iree.compiler.ir import (
+    AsmState,
     Attribute,
     Block,
     BlockArgument,
diff --git a/core/tests/aot/compiled_exported_program_test.py b/core/tests/aot/compiled_exported_program_test.py
@@ -119,6 +119,23 @@ class ParamsAsGlobalsModule(CompiledModule):
             2, module_str.count("util.global.load @_params.classifier.bias")
         )
 
+    def testBuffersAsGlobals(self):
+        fxb = FxProgramsBuilder(SimpleBuffers())
+
+        @fxb.export_program(args=(torch.empty([128]),))
+        def _compute1(module, x):
+            return module.forward(x)
+
+        class BuffersAsGlobalsModule(CompiledModule):
+            buffers = export_buffers(fxb.root_module, mutable=True)
+            compute1 = _compute1
+
+        inst = BuffersAsGlobalsModule(context=Context(), import_to="import")
+        module_str = str(CompiledModule.get_mlir_module(inst))
+        self.assertIn("util.global private mutable @_buffers.buf", module_str)
+        self.assertIn("%_buffers.buf = util.global.load @_buffers.buf", module_str)
+        self.assertIn("util.global.store", module_str)
+
 
 class SimpleParams(nn.Module):
     def __init__(self):
@@ -129,6 +146,18 @@ def forward(self, x):
         return self.classifier(x)
 
 
+class SimpleBuffers(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.register_buffer("buf", torch.randn(1))
+
+    def forward(self, x: torch.Tensor):
+        sumx = (x).sum()
+        output = x * self.buf
+        self.buf.copy_(sumx)
+        return output
+
+
 if __name__ == "__main__":
     logging.basicConfig(level=logging.DEBUG)
     unittest.main()

Original file line number	Diff line number	Diff line change
`@@ -36,5 +36,6 @@`
`36`	`36`	`"export_global",`
`37`	`37`	`"export_global_tree",`
`38`	`38`	`"export_parameters",`
	`39`	`+ "export_buffers",`
`39`	`40`	`"jittable",`
`40`	`41`	`]`