nod-ai
diff --git a/‎core/shark_turbine/aot/exporter.py‎
Lines changed: 4 additions & 0 deletions b/‎core/shark_turbine/aot/exporter.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎core/shark_turbine/aot/params.py‎
Lines changed: 152 additions & 7 deletions b/‎core/shark_turbine/aot/params.py‎
Lines changed: 152 additions & 7 deletions
diff --git a/‎core/shark_turbine/dynamo/type_conversion.py‎
Lines changed: 20 additions & 6 deletions b/‎core/shark_turbine/dynamo/type_conversion.py‎
Lines changed: 20 additions & 6 deletions
diff --git a/‎core/shark_turbine/transforms/general/custom_op_expansion.py‎
Lines changed: 8 additions & 2 deletions b/‎core/shark_turbine/transforms/general/custom_op_expansion.py‎
Lines changed: 8 additions & 2 deletions
diff --git a/‎core/tests/aot/params_test.py‎
Lines changed: 8 additions & 9 deletions b/‎core/tests/aot/params_test.py‎
Lines changed: 8 additions & 9 deletions
diff --git a/‎core/tests/transforms/general/custom_op_expansion_test.py‎
Lines changed: 4 additions & 2 deletions b/‎core/tests/transforms/general/custom_op_expansion_test.py‎
Lines changed: 4 additions & 2 deletions
@@ -68,6 +68,10 @@ def mlir_module(self) -> Operation:
         """Gets the MLIR module resulting from the last compilation phase."""
         return CompiledModule.get_mlir_module(self.compiled_module)
 
+    def verify(self):
+        """Runs the verifier on the module, raising an exception on failure."""
+        self.mlir_module.verify()
+
     def print_readable(self, large_elements_limit: int = 50):
         """Prints a human readable version of the current compilation IR."""
         self.mlir_module.print(large_elements_limit=large_elements_limit)
 
@@ -6,9 +6,11 @@
 
 from typing import Iterator, List, Optional, Set, Tuple, Union
 
-from dataclasses import dataclass
+import json
 from pathlib import Path
+import warnings
 
+import numpy as np
 import torch
 import torch.nn as nn
 
@@ -26,6 +28,7 @@
     "externalize_module_parameters",
     "save_module_parameters",
     "ParameterArchive",
+    "ParameterArchiveEntry",
     "ParameterArchiveBuilder",
 ]
 
@@ -46,6 +49,64 @@ def externalize_module_parameters(
         trait.set(tensor)
 
 
+################################################################################
+# Metadata
+################################################################################
+
+_dtype_to_name: dict[torch.dtype, str] = {
+    torch.float32: "float32",
+    torch.float64: "float64",
+    torch.complex64: "complex64",
+    torch.complex128: "complex128",
+    torch.float16: "float16",
+    torch.bfloat16: "bfloat16",
+    torch.float8_e4m3fn: "float8_e4m3fn",
+    torch.float8_e4m3fnuz: "float8_e4m3fnuz",
+    torch.float8_e5m2: "float8_e5m2",
+    torch.float8_e5m2fnuz: "float8_e5m2fnuz",
+    torch.int8: "int8",
+    torch.int16: "int16",
+    torch.int32: "int32",
+    torch.int64: "int64",
+    torch.uint16: "uint16",
+    torch.uint32: "uint32",
+    torch.uint64: "uint64",
+    torch.uint8: "uint8",
+    torch.bool: "bool",
+}
+
+_name_to_dtype: dict[str, torch.dtype] = {v: k for k, v in _dtype_to_name.items()}
+
+_metadata_prefix = "PYTORCH:"
+
+
+def _make_tensor_metadata(t: torch.Tensor) -> str:
+    """Makes a tensor metadata blob that can be used to reconstruct the tensor."""
+    dtype = t.dtype
+    try:
+        dtype_name = _dtype_to_name[dtype]
+    except KeyError:
+        dtype_name = "unknown"
+        warnings.warn(
+            f"Unknown dtype saving params: {dtype} (missing entry in params._dtype_to_name)"
+        )
+    dtype_desc = {
+        "class_name": type(dtype).__name__,
+        "is_complex": dtype.is_complex,
+        "is_floating_point": dtype.is_floating_point,
+        "is_signed": dtype.is_signed,
+        "itemsize": dtype.itemsize,
+    }
+    d = {
+        "type": "Tensor",
+        "dtype": dtype_name,
+        "shape": list(t.shape),
+        "dtype_desc": dtype_desc,
+    }
+    encoded = f"{_metadata_prefix}{json.dumps(d)}"
+    return encoded
+
+
 ################################################################################
 # Parameter archives save/load
 ################################################################################
@@ -63,6 +124,73 @@ def save_module_parameters(
     builder.save(file_path)
 
 
+class ParameterArchiveEntry:
+    """Wraps a raw ParameterIndexEntry with additional helpers."""
+
+    def __init__(self, raw: ParameterIndexEntry):
+        self.raw = raw
+
+    @property
+    def key(self) -> str:
+        return self.raw.key
+
+    def as_flat_tensor(self) -> torch.Tensor:
+        """Accesses the contents as a uint8 flat tensor.
+
+        If it is a splat, then the tensor will be a view of the splat pattern.
+
+        Raises a ValueError on unsupported entries.
+        """
+        if self.raw.is_file:
+            wrapper = np.array(self.raw.file_view, copy=False)
+        elif self.raw.is_splat:
+            wrapper = np.array(self.raw.splat_pattern, copy=True)
+        else:
+            raise ValueError(f"Unsupported ParameterIndexEntry: {self.raw}")
+
+        return torch.from_numpy(wrapper)
+
+    def as_tensor(self) -> torch.Tensor:
+        """Returns a tensor viewed with appropriate shape/dtype from metadata.
+
+        Raises a ValueError if unsupported.
+        """
+        # Decode metadata.
+        metadata = self.raw.metadata.decode()
+        if not metadata.startswith(_metadata_prefix):
+            raise ValueError(
+                f"No metadata for parameter entry {self.key}: Cannot convert to tensor"
+            )
+        metadata = metadata[len(_metadata_prefix) :]
+        d = json.loads(metadata)
+        try:
+            type_name = d["type"]
+            if d["type"] != "Tensor":
+                raise ValueError(
+                    f"Metadata for parameter entry {self.key} is not a Tensor ('{type_name}')"
+                )
+            dtype_name = d["dtype"]
+            shape = d["shape"]
+        except KeyError as e:
+            raise ValueError(f"Bad metadata for parameter entry {self.key}") from e
+
+        # Unpack/validate.
+        try:
+            dtype = _name_to_dtype[dtype_name]
+        except KeyError:
+            raise ValueError(f"Unknown dtype name '{dtype_name}'")
+        try:
+            shape = [int(d) for d in shape]
+        except ValueError as e:
+            raise ValueError(f"Illegal shape for parameter entry {self.key}") from e
+
+        t = self.as_flat_tensor()
+        return t.view(dtype=dtype).view(shape)
+
+    def __repr__(self):
+        return f"ParameterArchiveEntry({self.raw}, metadata={self.raw.metadata})"
+
+
 class ParameterArchive:
     """Allows access to a parameter archive as CPU tensors.
 
@@ -71,11 +199,16 @@ class ParameterArchive:
     """
 
     def __init__(
-        self, file_path: Optional[Union[str, Path]] = None, *, mmap: bool = True
+        self,
+        file_path: Optional[Union[str, Path]] = None,
+        *,
+        mmap: bool = True,
+        readable: bool = True,
+        writable: bool = False,
     ):
         self._index = ParameterIndex()
         if file_path is not None:
-            self.load(file_path, mmap=mmap)
+            self.load(file_path, mmap=mmap, readable=readable, writable=writable)
 
     def load(
         self,
@@ -94,8 +227,12 @@ def load(
     def index(self) -> ParameterIndex:
         return self._index
 
-    def items(self) -> List[Tuple[str, ParameterIndexEntry]]:
-        return self._index.items()
+    def items(self) -> List[Tuple[str, ParameterArchiveEntry]]:
+        """Returns the items in the archive.
+
+        Note that there can be duplicates if the archive was constructed that way.
+        """
+        return [(k, ParameterArchiveEntry(v)) for k, v in self._index.items()]
 
     def __repr__(self):
         return repr(self._index)
@@ -113,14 +250,22 @@ def save(self, file_path: Union[str, Path]):
 
     def add_tensor(self, name: str, tensor: torch.Tensor):
         """Adds an named tensor to the archive."""
-        host_array = tensor.detach().cpu().contiguous().numpy()
-        self._index.add_buffer(name, host_array)
+        flat_array = tensor.detach().flatten().contiguous().cpu().view(torch.uint8)
+        host_array = flat_array.numpy()
+        self._index.add_buffer(name, host_array, metadata=_make_tensor_metadata(tensor))
 
     def add_module(self, module: nn.Module, *, prefix: str = ""):
         """Adds all parameters and persistent buffers from a module hierarchy."""
         for name, t in _yield_saveable_tensors(module, prefix=prefix):
             self.add_tensor(name, t)
 
+    def add_blob(self, key: str, blob):
+        """Adds a raw blob to the index.
+
+        The blob must be interpretable as a buffer.
+        """
+        self._index.add_buffer(key, blob)
+
 
 def _yield_saveable_tensors(
     module: nn.Module, *, prefix: str = ""
 
@@ -9,23 +9,25 @@
 Note that there are ad-hoc type conversions spread around a bit, and we
 should consolidate them here.
 """
-from typing import List
+from typing import List, Optional
 
 import functools
 import re
 
-from iree.compiler.ir import (
+from ..support.ir_imports import (
+    tensor_d,
     Context,
     F64Type,
     IntegerType,
     RankedTensorType,
     ShapedType,
-    Type as IrType,
+    IrType,
     Location,
     Operation,
     Value,
 )
 
+
 # Match an overall torch type declaration. Groups:
 #   1. Local name (int, float, vtensor)
 #   2. Parameter block ("<...>"), including the delimitters
@@ -103,11 +105,15 @@ def convert_torch_element_type_to_native(
         return torch_type
 
     def materialize_native_to_torch(
-        self, native_value: Value, torch_type: IrType
+        self, native_value: Value, torch_type: IrType, *, static_info_cast: bool = False
     ) -> Value:
         native_type = native_value.type
         if RankedTensorType.isinstance(native_type):
             # Convert to vtensor.
+            if static_info_cast:
+                required_native_type = self.torch_type_to_native(torch_type)
+                if required_native_type != native_type:
+                    native_value = tensor_d.cast(required_native_type, native_value)
             return Operation.create(
                 "torch_c.from_builtin_tensor",
                 results=[torch_type],
@@ -138,15 +144,23 @@ def materialize_native_to_torch(
                 f"Unsupported native->torch ABI type conversion: {native_type} -> {torch_type}"
             )
 
-    def materialize_torch_to_native(self, torch_value: Value) -> Value:
+    def materialize_torch_to_native(
+        self, torch_value: Value, *, static_info_cast_to: Optional[IrType] = None
+    ) -> Value:
         native_type = self.torch_type_to_native(torch_value.type)
         if RankedTensorType.isinstance(native_type):
             # Convert to vtensor.
-            return Operation.create(
+            builtin_tensor_value = Operation.create(
                 "torch_c.to_builtin_tensor",
                 results=[native_type],
                 operands=[torch_value],
             ).result
+            # Detect type difference and assume a static cast is needed.
+            if static_info_cast_to is not None and static_info_cast_to != native_type:
+                builtin_tensor_value = tensor_d.cast(
+                    static_info_cast_to, builtin_tensor_value
+                )
+            return builtin_tensor_value
         elif IntegerType.isinstance(native_type):
             # Convert to !torch.int
             int_type = IntegerType(native_type)
 
@@ -32,6 +32,7 @@
 
 from ...support.ir_imports import (
     Block,
+    IrType,
     InsertionPoint,
     OpResult,
     Operation,
@@ -262,7 +263,10 @@ def __init__(
                 if not desc.is_list:
                     if arity == 1:
                         arg_bindings.append(
-                            type_converter.materialize_torch_to_native(operand)
+                            type_converter.materialize_torch_to_native(
+                                operand,
+                                static_info_cast_to=IrType.parse(desc.mlir_type_asm),
+                            )
                         )
                     else:
                         arg_bindings.append(None)
@@ -297,7 +301,9 @@ def yield_results(self, *results: Value):
             for new_result, old_result in zip(results, torch_op_results):
                 torch_type = old_result.type
                 new_result = self.type_converter.materialize_native_to_torch(
-                    new_result, torch_type
+                    new_result,
+                    torch_type,
+                    static_info_cast=True,
                 )
                 old_result.replace_all_uses_with(new_result)
         self.yielded = True
@@ -12,11 +12,6 @@
 import torch
 import torch.nn as nn
 
-from iree.runtime import (
-    ParameterIndex,
-    ParameterProvider,
-)
-
 from shark_turbine.aot import (
     export,
     externalize_module_parameters,
@@ -50,8 +45,10 @@ def testCreateArchive(self):
                 # lock the file for an arbitrary duration.
                 archive = ParameterArchive(file_path, mmap=False)
                 items = dict(archive.items())
-                self.assertIn("classifier.weight", items)
-                self.assertIn("classifier.bias", items)
+                weight = items["classifier.weight"].as_tensor()
+                bias = items["classifier.bias"].as_tensor()
+                torch.testing.assert_close(weight, m.classifier.weight)
+                torch.testing.assert_close(bias, m.classifier.bias)
             finally:
                 file_path.unlink()
 
@@ -65,8 +62,10 @@ def testCreateArchiveWithPrefixScope(self):
                 # lock the file for an arbitrary duration.
                 archive = ParameterArchive(file_path, mmap=False)
                 items = dict(archive.items())
-                self.assertIn("foobar.model.classifier.weight", items)
-                self.assertIn("foobar.model.classifier.bias", items)
+                weight = items["foobar.model.classifier.weight"].as_tensor()
+                bias = items["foobar.model.classifier.bias"].as_tensor()
+                torch.testing.assert_close(weight, m.classifier.weight)
+                torch.testing.assert_close(bias, m.classifier.bias)
             finally:
                 file_path.unlink()
 
 
@@ -34,16 +34,18 @@ def setUpClass(cls):
     def testTensorArgReturn(self):
         m = self.run_test_case("custom_op_simple.mlir")
         m_asm = str(m)
+        print(m_asm)
         self.assertNotIn("torch.operator", m_asm)
         self.assertIn(
             "%0 = torch_c.to_builtin_tensor %arg0 : !torch.vtensor<[97,8],f32> -> tensor<97x8xf32>",
             m_asm,
         )
+        # TODO: Upgrade to a FileCheck style test so we can pattern match that
+        # the casts are inserted properly.
         self.assertIn(
-            "%1 = torch_c.from_builtin_tensor %0 : tensor<97x8xf32> -> !torch.vtensor<[97,8],f32>",
+            "%1 = torch_c.from_builtin_tensor %cast_0 : tensor<97x8xf32> -> !torch.vtensor<[97,8],f32>",
             m_asm,
         )
-        print(m_asm)
 
     def testStringAttrArg(self):
         global _TEST_STRING_ATTR