Add MLX metadata and Audio type for transcriptions

olokobayusuf · olokobayusuf · commit 71c1e26a7387 · 2026-03-03T09:10:49.000-08:00
diff --git a/Changelog.md b/Changelog.md
@@ -1,5 +1,8 @@
 ## 0.0.92
-+ Added `muna.beta.Audio` type for creating transcriptions on raw audio buffers.
++ Added `beta.MLXInferenceMetadata` to compile PyTorch models for inference with MLX on Apple Silicon.
++ Added `beta.MLXInferenceSessionMetadata` to compile ONNXRuntime `InferenceSession` instances for inference with MLX on Apple Silicon.
++ Added `beta.Audio` type for creating transcriptions on raw PCM audio buffers.
++ Removed `beta.OnnxInferenceMetadata.output_keys` field for specifying model output dictionary keys.
 
 ## 0.0.91
 + Fixed sporadic memory corruption when creating predictions with image inputs on Windows.
diff --git a/muna/beta/metadata/__init__.py b/muna/beta/metadata/__init__.py
@@ -9,6 +9,7 @@
 from .iree import IREEInferenceBackend, IREEInferenceMetadata
 from .litert import LiteRTInferenceMetadata
 from .llama import LlamaCppBackend, LlamaCppInferenceMetadata
+from .mlx import MLXInferenceMetadata, MLXInferenceSessionMetadata
 from .onnx import (
     OnnxRuntimeInferenceMetadata, OnnxRuntimeExecutionProvider,
     OnnxRuntimeOptimizationLevel
diff --git a/muna/beta/metadata/_torch.py b/muna/beta/metadata/_torch.py
@@ -60,11 +60,6 @@ class PyTorchInferenceMetadataBase(BaseModel, **ConfigDict(arbitrary_types_allow
         description="Model input tensor shapes. Use this to specify dynamic axes.",
         exclude=True
     )
-    output_keys: list[str] | None = Field(
-        default=None,
-        description="Model output dictionary keys. Use this if the model returns a dictionary.",
-        exclude=True
-    )
     optimum_config: Annotated[object | None, BeforeValidator(_validate_optimum_exporter_config)] = Field(
         default=None,
         description="Optimum exporter configuration. Required when `exporter` is `optimum`.",
diff --git a/muna/beta/metadata/coreml.py b/muna/beta/metadata/coreml.py
@@ -14,8 +14,7 @@ class CoreMLInferenceMetadata(PyTorchInferenceMetadataBase):
 
     Members:
         model (torch.nn.Module): PyTorch module to apply metadata to.
-        model_args (tuple[Tensor,...]): Positional inputs to the model.
+        model_args (tuple): Positional inputs to the model.
         input_shapes (list): Model input tensor shapes. Use this to specify dynamic axes.
-        output_keys (list): Model output dictionary keys. Use this if the model returns a dictionary.
     """
     kind: Literal["meta.inference.coreml"] = Field(default="meta.inference.coreml", init=False)
diff --git a/muna/beta/metadata/executorch.py b/muna/beta/metadata/executorch.py
@@ -6,7 +6,7 @@
 from pydantic import Field
 from typing import Literal
 
-from ._torch import PyTorchInferenceMetadataBase, TorchExporter
+from ._torch import PyTorchInferenceMetadataBase
 
 ExecuTorchInferenceBackend = Literal["xnnpack", "vulkan"]
 
@@ -16,13 +16,12 @@ class ExecuTorchInferenceMetadata(PyTorchInferenceMetadataBase):
 
     Members:
         model (torch.nn.Module): PyTorch module to apply metadata to.
-        model_args (tuple[Tensor,...]): Positional inputs to the model.
+        model_args (tuple): Positional inputs to the model.
         input_shapes (list): Model input tensor shapes. Use this to specify dynamic axes.
-        output_keys (list): Model output dictionary keys. Use this if the model returns a dictionary.
         backend (ExecuTorchInferenceBackend): ExecuTorch backend to execute the model.
     """
     kind: Literal["meta.inference.executorch"] = Field(default="meta.inference.executorch", init=False)
-    exporter: TorchExporter | None = Field(default=None, init=False)
+    exporter: None = Field(default=None, init=False, exclude=True)
     backend: ExecuTorchInferenceBackend = Field(
         default="xnnpack",
         description="ExecuTorch backend to execute the model.",
diff --git a/muna/beta/metadata/iree.py b/muna/beta/metadata/iree.py
@@ -17,9 +17,8 @@ class IREEInferenceMetadata(PyTorchInferenceMetadataBase):
     Members:
         model (torch.nn.Module): PyTorch module to apply metadata to.
         exporter (TorchExporter): PyTorch exporter to use.
-        model_args (tuple[Tensor,...]): Positional inputs to the model.
+        model_args (tuple): Positional inputs to the model.
         input_shapes (list): Model input tensor shapes. Use this to specify dynamic axes.
-        output_keys (list): Model output dictionary keys. Use this if the model returns a dictionary.
     """
     kind: Literal["meta.inference.iree"] = Field(default="meta.inference.iree", init=False)
     backend: IREEInferenceBackend = Field(
diff --git a/muna/beta/metadata/litert.py b/muna/beta/metadata/litert.py
@@ -6,17 +6,16 @@
 from pydantic import Field
 from typing import Literal
 
-from ._torch import PyTorchInferenceMetadataBase, TorchExporter
+from ._torch import PyTorchInferenceMetadataBase
 
 class LiteRTInferenceMetadata(PyTorchInferenceMetadataBase):
     """
     Metadata to compile a PyTorch model for inference with LiteRT.
 
     Members:
         model (torch.nn.Module): PyTorch module to apply metadata to.
-        model_args (tuple[Tensor,...]): Positional inputs to the model.
+        model_args (tuple): Positional inputs to the model.
         input_shapes (list): Model input tensor shapes. Use this to specify dynamic axes.
-        output_keys (list): Model output dictionary keys. Use this if the model returns a dictionary.
     """
     kind: Literal["meta.inference.litert"] = Field(default="meta.inference.litert", init=False)
-    exporter: TorchExporter | None = Field(default=None, init=False)
+    exporter: None = Field(default=None, init=False, exclude=True)
diff --git a/muna/beta/metadata/mlx.py b/muna/beta/metadata/mlx.py
@@ -0,0 +1,35 @@
+# 
+#   Muna
+#   Copyright © 2026 NatML Inc. All Rights Reserved.
+#
+
+from pydantic import Field
+from typing import Literal
+
+from ._torch import PyTorchInferenceMetadataBase
+from .onnxruntime import OnnxRuntimeInferenceSessionMetadata
+
+class MLXInferenceMetadata(PyTorchInferenceMetadataBase):
+    """
+    Metadata to compile a PyTorch model for inference with MLX on Apple Silicon.
+
+    Members:
+        model (torch.nn.Module): PyTorch module to apply metadata to.
+        exporter (TorchExporter): PyTorch exporter to use.
+        model_args (tuple): Positional inputs to the model.
+        input_shapes (list): Model input tensor shapes. Use this to specify dynamic axes.
+        optimum_config (optimum.ExporterConfig): Optimum exporter configuration. Required when `exporter` is `optimum`.
+    """
+    kind: Literal["meta.inference.mlx"] = Field(default="meta.inference.mlx", init=False)
+
+class MLXInferenceSessionMetadata(OnnxRuntimeInferenceSessionMetadata):
+    """
+    Metadata to compile an OnnxRuntime `InferenceSession` for inference with MLX on Apple Silicon.
+
+    Members:
+        session (onnxruntime.InferenceSession): OnnxRuntime inference session to apply metadata to.
+        model_path (str | Path): ONNX model path. The file must exist in the compiler sandbox.
+        external_data_path (str | Path): ONNX model external data path. This file must exist in the compiler sandbox.
+    """
+    kind: Literal["meta.inference.mlx_onnx"] = Field(default="meta.inference.mlx_onnx", init=False)
+    providers: None = Field(default=None, init=False, exclude=True)
diff --git a/muna/beta/metadata/onnx.py b/muna/beta/metadata/onnx.py
@@ -18,9 +18,8 @@ class OnnxRuntimeInferenceMetadata(PyTorchInferenceMetadataBase):
     Members:
         model (torch.nn.Module): PyTorch module to apply metadata to.
         exporter (TorchExporter): PyTorch exporter to use.
-        model_args (tuple[Tensor,...]): Positional inputs to the model.
+        model_args (tuple): Positional inputs to the model.
         input_shapes (list): Model input tensor shapes. Use this to specify dynamic axes.
-        output_keys (list): Model output dictionary keys. Use this if the model returns a dictionary.
         optimum_config (optimum.ExporterConfig): Optimum exporter configuration. Required when `exporter` is `optimum`.
         optimization (OnnxRuntimeOptimizationLevel): ONNX model optimization level.
         providers (list): Execution providers that can be used to accelerate inference for this model.
diff --git a/muna/beta/metadata/onnxruntime.py b/muna/beta/metadata/onnxruntime.py
@@ -24,7 +24,9 @@ class OnnxRuntimeInferenceSessionMetadata(BaseModel, **ConfigDict(arbitrary_type
 
     Members:
         session (onnxruntime.InferenceSession): OnnxRuntime inference session to apply metadata to.
-        model_path (str | Path): ONNX model path. The model must exist at this path in the compiler sandbox.
+        model_path (str | Path): ONNX model path. The file must exist in the compiler sandbox.
+        external_data_path (str | Path): ONNX model external data path. This file must exist in the compiler sandbox.
+        providers (list): Execution providers that can be used to accelerate inference for this model.
     """
     kind: Literal["meta.inference.onnxruntime"] = Field(default="meta.inference.onnxruntime", init=False)
     session: Annotated[object, BeforeValidator(_validate_ort_inference_session)] = Field(
diff --git a/muna/beta/metadata/openvino.py b/muna/beta/metadata/openvino.py
@@ -15,8 +15,8 @@ class OpenVINOInferenceMetadata(PyTorchInferenceMetadataBase):
     Members:
         model (torch.nn.Module): PyTorch module to apply metadata to.
         exporter (TorchExporter): PyTorch exporter to use.
-        model_args (tuple[Tensor,...]): Positional inputs to the model.
+        model_args (tuple): Positional inputs to the model.
         input_shapes (list): Model input tensor shapes. Use this to specify dynamic axes.
-        output_keys (list): Model output dictionary keys. Use this if the model returns a dictionary.
+        optimum_config (optimum.ExporterConfig): Optimum exporter configuration. Required when `exporter` is `optimum`.
     """
     kind: Literal["meta.inference.openvino"] = Field(default="meta.inference.openvino", init=False)
diff --git a/muna/beta/metadata/qnn.py b/muna/beta/metadata/qnn.py
@@ -18,9 +18,8 @@ class QnnInferenceMetadata(PyTorchInferenceMetadataBase):
     Members:
         model (torch.nn.Module): PyTorch module to apply metadata to.
         exporter (TorchExporter): PyTorch exporter to use.
-        model_args (tuple[Tensor,...]): Positional inputs to the model.
+        model_args (tuple): Positional inputs to the model.
         input_shapes (list): Model input tensor shapes. Use this to specify dynamic axes.
-        output_keys (list): Model output dictionary keys. Use this if the model returns a dictionary.
         optimum_config (optimum.ExporterConfig): Optimum exporter configuration. Required when `exporter` is `optimum`.
         backend (QnnInferenceBackend): QNN inference backend. Defaults to `cpu`.
         quantization (QnnInferenceQuantization): QNN model quantization mode. This MUST only be specified when backend is `htp`.
diff --git a/muna/beta/metadata/tensorrt.py b/muna/beta/metadata/tensorrt.py
@@ -25,10 +25,9 @@ class TensorRTInferenceMetadata(PyTorchInferenceMetadataBase):
 
     Members:
         model (torch.nn.Module): PyTorch module to apply metadata to.
-        model_args (tuple[Tensor,...]): Positional inputs to the model.
-        input_shapes (list): Model input tensor shapes. Use this to specify dynamic axes.
-        output_keys (list): Model output dictionary keys. Use this if the model returns a dictionary.
         exporter (TorchExporter): PyTorch exporter to use.
+        model_args (tuple): Positional inputs to the model.
+        input_shapes (list): Model input tensor shapes. Use this to specify dynamic axes.
         cuda_arch (CudaArchitecture): Target CUDA architecture for the TensorRT engine. Defaults to `sm_80` (Ampere).
         precision (TensorRTPrecision): TensorRT engine inference precision. Defaults to `fp16`.
         hardware_compatibility (TensorRTHardwareCompatibility): TensorRT engine hardware compatibility. Defaults to `none`.
diff --git a/muna/beta/metadata/tensorrt_rtx.py b/muna/beta/metadata/tensorrt_rtx.py
@@ -15,10 +15,10 @@ class TensorRTRTXInferenceMetadata(PyTorchInferenceMetadataBase):
 
     Members:
         model (torch.nn.Module): PyTorch module to apply metadata to.
-        model_args (tuple[Tensor,...]): Positional inputs to the model.
-        input_shapes (list): Model input tensor shapes. Use this to specify dynamic axes.
-        output_keys (list): Model output dictionary keys. Use this if the model returns a dictionary.
         exporter (TorchExporter): PyTorch exporter to use.
+        model_args (tuple): Positional inputs to the model.
+        input_shapes (list): Model input tensor shapes. Use this to specify dynamic axes.
+        optimum_config (optimum.ExporterConfig): Optimum exporter configuration. Required when `exporter` is `optimum`.
         precision (TensorRTPrecision): TensorRT engine inference precision. Defaults to `fp16`.
     """
     kind: Literal["meta.inference.tensorrt_rtx"] = Field(default="meta.inference.tensorrt_rtx", init=False)
diff --git a/muna/beta/types.py b/muna/beta/types.py
@@ -10,7 +10,12 @@
 class Audio(BaseModel, **ConfigDict(arbitrary_types_allowed=True, frozen=True)):
     """
     Audio buffer.
+
+    Members:
+        samples (ndarray): Linear PCM audio samples with shape (F,C).
+        sample_rate (int): Audio sample rate (Hz).
+        channel_count (int): Audio channel count.
     """
-    samples: NDArray[float32] = Field(description="Audio samples with shape (F,C).")
+    samples: NDArray[float32] = Field(description="Linear PCM audio samples with shape (F,C).")
     sample_rate: int = Field(description="Audio sample rate (Hz).")
     channel_count: int = Field(description="Audio channel count.")
diff --git a/muna/compile.py b/muna/compile.py
@@ -12,10 +12,12 @@
 from typing import Callable, Literal, ParamSpec, TypeVar, cast
 
 from .beta import (
-    CoreMLInferenceMetadata, ExecuTorchInferenceMetadata, LiteRTInferenceMetadata,
-    LlamaCppInferenceMetadata, IREEInferenceMetadata, OnnxRuntimeInferenceMetadata,
-    OnnxRuntimeInferenceSessionMetadata, OpenVINOInferenceMetadata, QnnInferenceMetadata,
-    TensorRTInferenceMetadata, TensorRTRTXInferenceMetadata, TFLiteInterpreterMetadata
+    CoreMLInferenceMetadata, ExecuTorchInferenceMetadata, IREEInferenceMetadata,
+    LiteRTInferenceMetadata, LlamaCppInferenceMetadata, MLXInferenceMetadata,
+    MLXInferenceSessionMetadata, OnnxRuntimeInferenceMetadata,
+    OnnxRuntimeInferenceSessionMetadata, OpenVINOInferenceMetadata,
+    QnnInferenceMetadata, TensorRTInferenceMetadata,
+    TensorRTRTXInferenceMetadata, TFLiteInterpreterMetadata
 )
 from .sandbox import Sandbox
 from .types import PredictorAccess
@@ -36,6 +38,8 @@
     IREEInferenceMetadata               |
     LiteRTInferenceMetadata             |
     LlamaCppInferenceMetadata           |
+    MLXInferenceMetadata                |
+    MLXInferenceSessionMetadata         |
     OnnxRuntimeInferenceMetadata        |
     OnnxRuntimeInferenceSessionMetadata |
     OpenVINOInferenceMetadata           |