[export] Refactor pt2 save/load (pytorch#152495)

angelayi · pytorchmergebot · commit d2bfd97d71c7 · 2025-06-04T06:04:29.000Z
Refactor the pt2 archive saving to consolidate the format of torch.export.save and torch._inductor.package.package_aoti. This PR adds the following functions, which torch.export.save and AOTI packaging calls into: ```python package_pt2( f: FileLike, *, exported_programs: Optional[Union[ExportedProgram, dict[str, ExportedProgram]]] = None, aoti_files: Optional[Union[list[str], dict[str, list[str]]]] = None, extra_files: Optional[dict[str, Any]] = None, ) -> FileLike @DataClass class PT2ArchiveContents: exported_programs: dict[str, ExportedProgram] aoti_runners: dict[str, AOTICompiledModel] extra_files: dict[str, Any] load_pt2(f: FileLike) -> PT2ArchiveContents ``` Power users directly call into these APIs if they want to bundle multiple exported programs, aoti files, or extra metadata. This is how the pt2 archive looks like ([spec](https://docs.google.com/document/d/1RQ4cmywilnFUT1VE-4oTGxwXdc8vowCSZsrRgo3wFA8/edit?tab=t.0)): ``` ├── archive_format ├── version ├── .data ├── data │ ├── aotinductor │ │ └── model1 │ │ ├── model1.cpp │ │ ├── model1.so # currently AOTI automatically moves weights in here, TODO to move it out │ │ ├── cg7domx3woam3nnliwud7yvtcencqctxkvvcafuriladwxw4nfiv.cubin │ │ └── cubaaxppb6xmuqdm4bej55h2pftbce3bjyyvljxbtdfuolmv45ex.cubin │ ├── weights │ │ ├── model1.pt # TODO to dedup weights between model1/model2 │ │ └── model2.pt │ └── constants │ │ ├── model1.pt # TODO to dedup weights between model1/model2 │ │ └── model2.pt │ └── sample_inputs │ ├── model1.pt # TODO to dedup weights between model1/model2 │ └── model2.pt ├── extra │ └── user_metadata.txt └── models ├── model1.json └── model2.json ``` Future todos: - unbundle the weights -- instead of .pt, we can use bin files, which will also allow us to dedup weights if we store multiple models - update aoti_compile_and_package to also save the exported program - integrate TNR with this packaging flow Pull Request resolved: pytorch#152495 Approved by: https://github.com/yushangdi
diff --git a/test/export/test_serialize.py b/test/export/test_serialize.py
@@ -34,9 +34,12 @@
 from torch._higher_order_ops.torchbind import enable_torchbind_tracing
 from torch._subclasses.fake_tensor import FakeTensor, FakeTensorMode
 from torch.export import Dim, export_for_training, load, save, unflatten
+from torch.export.pt2_archive.constants import ARCHIVE_VERSION_PATH
 from torch.fx.experimental.symbolic_shapes import is_concrete_int, ValueRanges
 from torch.testing._internal.common_utils import (
     instantiate_parametrized_tests,
+    IS_FBCODE,
+    IS_MACOS,
     IS_WINDOWS,
     parametrize,
     run_tests,
@@ -1491,6 +1494,7 @@ def forward(self, x):
 
         self.assertTrue(torch.allclose(ep.module()(*inp), loaded_ep.module()(*inp)))
 
+    @unittest.skipIf(IS_WINDOWS, "Cannot modify file in windows")
     def test_save_file(self):
         class Foo(torch.nn.Module):
             def forward(self, x):
@@ -1501,10 +1505,10 @@ def forward(self, x):
         inp = (torch.randn(2, 2),)
         ep = export_for_training(f, inp, strict=True)
 
-        with tempfile.NamedTemporaryFile() as f:
-            save(ep, f)
+        with tempfile.NamedTemporaryFile(suffix=".pt2") as f:
+            save(ep, f.name)
             f.seek(0)
-            loaded_ep = load(f)
+            loaded_ep = load(f.name)
 
         self.assertTrue(torch.allclose(ep.module()(*inp), loaded_ep.module()(*inp)))
 
@@ -1518,7 +1522,7 @@ def forward(self, x, y):
         inp = (torch.tensor([6]), torch.tensor([7]))
         ep = export_for_training(f, inp, strict=True)
 
-        with TemporaryFileName() as fname:
+        with TemporaryFileName(suffix=".pt2") as fname:
             path = Path(fname)
             save(ep, path)
             loaded_ep = load(path)
@@ -1545,6 +1549,9 @@ def forward(self, x):
         self.assertTrue(torch.allclose(ep.module()(*inp), loaded_ep.module()(*inp)))
         self.assertEqual(extra_files["extra.txt"], "moo")
 
+    @unittest.skipIf(
+        IS_FBCODE or IS_MACOS or IS_WINDOWS, "The file path is different in fbcode CI"
+    )
     def test_version_error(self):
         class Foo(torch.nn.Module):
             def forward(self, x):
@@ -1555,18 +1562,19 @@ def forward(self, x):
         ep = export_for_training(f, (torch.randn(1, 3),), strict=True)
 
         with self.assertRaisesRegex(
-            RuntimeError, r"Serialized version .* does not match our current"
+            ValueError, r"Saved archive version -1 does not match our current"
         ):
-            with tempfile.NamedTemporaryFile() as f:
-                save(ep, f)
+            with tempfile.NamedTemporaryFile(suffix=".pt2") as f:
+                save(ep, f.name)
                 f.seek(0)
+                file_prefix = f.name.split("/")[2].split(".")[0]
 
                 # Modify the version
                 with zipfile.ZipFile(f, "a") as zipf:
-                    zipf.writestr("version", "-1.1")
+                    zipf.writestr(f"{file_prefix}/{ARCHIVE_VERSION_PATH}", "-1")
 
                 f.seek(0)
-                load(f)
+                load(f.name)
 
     def test_save_constants(self):
         class Foo(torch.nn.Module):
diff --git a/torch/_C/__init__.pyi.in b/torch/_C/__init__.pyi.in
@@ -1561,6 +1561,7 @@ class PyTorchFileReader:
     @overload
     def __init__(self, buffer: IO[bytes]) -> None: ...
     def get_record(self, name: str) -> bytes: ...
+    def get_all_records(self) -> list[str]: ...
     def serialization_id(self) -> str: ...
 
 class PyTorchFileWriter:
diff --git a/torch/_inductor/package/package.py b/torch/_inductor/package/package.py
@@ -3,20 +3,12 @@
 import logging
 import os
 import tempfile
-from typing import Any, IO, Optional, Union
+from typing import IO, Union
 
 import torch
-import torch._inductor
-import torch.utils._pytree as pytree
 from torch._inductor import config
 from torch._inductor.cpp_builder import BuildOptionsBase, CppBuilder
-from torch.export._tree_utils import reorder_kwargs
-from torch.export.pt2_archive._package import PT2ArchiveWriter
-from torch.export.pt2_archive.constants import (
-    AOTINDUCTOR_DIR,
-    CONSTANTS_DIR,
-    CUSTOM_OBJ_FILENAME_PREFIX,
-)
+from torch.export.pt2_archive._package import AOTICompiledModel, load_pt2, package_pt2
 from torch.types import FileLike
 
 
@@ -95,122 +87,8 @@ def package_aoti(
         the AOTInductor files, or a dictionary mapping the model name to the
         path to its AOTInductor generated files.
     """
-    if isinstance(aoti_files, list):
-        aoti_files = {"model": aoti_files}
-
-    assert isinstance(aoti_files, dict), (
-        "Please pass a list of AOTI generated files to be packaged or "
-        "a dictionary mapping model names to their list of AOTI generated "
-        "files. You can get this list of files through calling "
-        "`torch._inductor.aot_compile(..., options={aot_inductor.package=True})`"
-    )
-    assert (
-        isinstance(archive_file, (io.IOBase, IO))
-        and archive_file.writable()
-        and archive_file.seekable()
-    ) or (
-        isinstance(archive_file, (str, os.PathLike))
-        and os.fspath(archive_file).endswith(".pt2")
-    ), (
-        f"Expect archive file to be a file ending in .pt2, or is a buffer. Instead got {archive_file}"
-    )
 
-    # Save using the PT2 packaging format
-    # (https://docs.google.com/document/d/1jLPp8MN8Whs0-VW9PmJ93Yg02W85tpujvHrTa1pc5x8/edit#heading=h.v2y2jgnwc56a)
-
-    with PT2ArchiveWriter(archive_file) as archive_writer:
-        for model_name, files in aoti_files.items():
-            num_so_files = 0
-            num_cpp_files = 0
-
-            for file in files:
-                if file == "":
-                    continue
-
-                if file.endswith(".so"):
-                    num_so_files += 1
-                    if num_so_files > 1:
-                        raise RuntimeError(
-                            f"Multiple .so files found in {files}. "
-                            "You might need to clear your cache "
-                            "directory before calling aoti_compile again."
-                        )
-                if file.endswith(".cpp"):
-                    num_cpp_files += 1
-                    if num_so_files > 1:
-                        raise RuntimeError(
-                            f"Multiple .cpp files found in {files}. "
-                            "You might need to clear your cache "
-                            "directory before calling aoti_compile again."
-                        )
-
-                filename = os.path.basename(file)
-                if filename.startswith(CUSTOM_OBJ_FILENAME_PREFIX):
-                    new_filepath = os.path.join(CONSTANTS_DIR, filename)
-                else:
-                    new_filepath = os.path.join(AOTINDUCTOR_DIR, model_name, filename)
-                log.debug(
-                    "Saving AOTI generated file %s to archive in %s", file, new_filepath
-                )
-                archive_writer.write_file(
-                    str(new_filepath),
-                    file,
-                )
-
-    if isinstance(archive_file, (io.IOBase, IO)):
-        archive_file.seek(0)
-    return archive_file
-
-
-class AOTICompiledModel:
-    """
-    Callable AOT Inductor loaded model from a .pt2
-    """
-
-    def __init__(self, loader: torch._C._aoti.AOTIModelPackageLoader) -> None:
-        self.loader = loader
-
-    def __call__(self, *args, **kwargs):  # type: ignore[no-untyped-def]
-        call_spec = self.loader.get_call_spec()  # type: ignore[attr-defined]
-        in_spec = pytree.treespec_loads(call_spec[0])
-        out_spec = pytree.treespec_loads(call_spec[1])
-        flat_inputs = pytree.tree_flatten((args, reorder_kwargs(kwargs, in_spec)))[0]
-        flat_inputs = [x for x in flat_inputs if isinstance(x, torch.Tensor)]
-        flat_outputs = self.loader.boxed_run(flat_inputs)  # type: ignore[attr-defined]
-        return pytree.tree_unflatten(flat_outputs, out_spec)
-
-    def get_metadata(self) -> dict[str, str]:
-        return self.loader.get_metadata()  # type: ignore[attr-defined]
-
-    def load_constants(
-        self,
-        constants_map: dict[str, torch.Tensor],
-        *,
-        check_full_update: bool,
-        user_managed: bool = False,
-    ) -> None:
-        """
-        Given a mapping of constant fqns to tensors, load the constants into the model.
-        You can use ``get_constant_fqns`` to get the list of constant fqns that
-        are needed in the compiled model.
-
-        Args:
-            constants_map: A mapping of constant fqns to tensors.
-            check_full_update: Whether to add check to see if all the constants
-            are updated and have values.
-        """
-        self.loader.load_constants(  # type: ignore[attr-defined]
-            constants_map, False, check_full_update, user_managed
-        )
-
-    def get_constant_fqns(self) -> list[str]:
-        return self.loader.get_constant_fqns()  # type: ignore[attr-defined]
-
-    def __deepcopy__(self, memo: Optional[dict[Any, Any]]) -> "AOTICompiledModel":
-        log.warning(
-            "AOTICompiledModel deepcopy warning: AOTICompiledModel.loader is not deepcopied."
-        )
-        return AOTICompiledModel(self.loader)  # type: ignore[attr-defined]
+    return package_pt2(archive_file, aoti_files=aoti_files)
 
 
 def load_package(
@@ -220,18 +98,25 @@ def load_package(
     num_runners: int = 1,
     device_index: int = -1,
 ) -> AOTICompiledModel:  # type: ignore[type-arg]
-    assert (
-        isinstance(path, (io.IOBase, IO)) and path.readable() and path.seekable()
-    ) or (isinstance(path, (str, os.PathLike)) and os.fspath(path).endswith(".pt2")), (
-        f"Unable to load package. Path must be a buffer or a file ending in .pt2. Instead got {path}"
-    )
+    try:
+        pt2_contents = load_pt2(
+            path,
+            run_single_threaded=run_single_threaded,
+            num_runners=num_runners,
+            device_index=device_index,
+        )
+        if model_name not in pt2_contents.aoti_runners:
+            raise RuntimeError(f"Model {model_name} not found in package")
+        return pt2_contents.aoti_runners[model_name]
+    except RuntimeError:
+        log.warning("Loading outdated pt2 file. Please regenerate your package.")
 
     if isinstance(path, (io.IOBase, IO)):
         with tempfile.NamedTemporaryFile(suffix=".pt2") as f:
             # TODO(angelayi): We shouldn't need to do this -- miniz should
             # handle reading the buffer. This is just a temporary workaround
-            f.write(path.read())
             path.seek(0)
+            f.write(path.read())
             log.debug("Writing buffer to tmp file located at %s.", f.name)
             loader = torch._C._aoti.AOTIModelPackageLoader(
                 f.name, model_name, run_single_threaded, num_runners, device_index
diff --git a/torch/export/__init__.py b/torch/export/__init__.py
@@ -381,29 +381,15 @@ def forward(self, x):
             f"The 'ep' parameter must be an instance of 'ExportedProgram', got '{type(ep).__name__}' instead."
         )
 
-    from torch._export.serde.schema import SCHEMA_VERSION
-    from torch._export.serde.serialize import serialize, SerializedArtifact
-
-    artifact: SerializedArtifact = serialize(ep, opset_version, pickle_protocol)
-
-    if isinstance(f, (str, os.PathLike)):
-        f = os.fspath(f)
-
-    with zipfile.ZipFile(f, "w") as zipf:
-        # Save every field in the SerializedArtifact to a file.
-        assert isinstance(artifact.exported_program, bytes)
-        zipf.writestr("serialized_exported_program.json", artifact.exported_program)
-        zipf.writestr("serialized_state_dict.pt", artifact.state_dict)
-        zipf.writestr("serialized_constants.pt", artifact.constants)
-        zipf.writestr("serialized_example_inputs.pt", artifact.example_inputs)
-
-        zipf.writestr("version", ".".join(map(str, SCHEMA_VERSION)))
-
-        # Add extra files if provided
-        if extra_files:
-            for extra_file_name, content in extra_files.items():
-                encoded_content = content.encode("utf-8")
-                zipf.writestr(f"extra_files/{extra_file_name}", encoded_content)
+    from torch.export.pt2_archive._package import package_pt2
+
+    package_pt2(
+        f,
+        exported_programs={"model": ep},
+        extra_files=extra_files,
+        pickle_protocol=pickle_protocol,
+        opset_version=opset_version,
+    )
 
 
 def load(
@@ -460,10 +446,32 @@ def load(
 
     extra_files = extra_files or {}
 
+    from torch.export.pt2_archive._package import load_pt2, PT2ArchiveContents
+
+    try:
+        pt2_contents = load_pt2(
+            f,
+            expected_opset_version=expected_opset_version,
+        )
+    except RuntimeError:
+        pt2_contents = PT2ArchiveContents({}, {}, {})
+
+    if len(pt2_contents.exported_programs) > 0 or len(pt2_contents.extra_files) > 0:
+        for k, v in pt2_contents.extra_files.items():
+            extra_files[k] = v
+
+        return pt2_contents.exported_programs["model"]
+
+    # TODO: For backward compatibility, we support loading a zip file from 2.7. Delete this path in 2.9(?)
+    warnings.warn(
+        "This version of file is deprecated. Please generate a new pt2 saved file."
+    )
     with zipfile.ZipFile(f, "r") as zipf:
         # Check the version
         version = zipf.read("version").decode().split(".")
-        from torch._export.serde.schema import SCHEMA_VERSION
+        from torch._export.serde.schema import (
+            SCHEMA_VERSION,  # todo change archive version to schema version
+        )
 
         assert len(version) == len(SCHEMA_VERSION)
         if version[0] != str(SCHEMA_VERSION[0]):
diff --git a/torch/export/pt2_archive/_package.py b/torch/export/pt2_archive/_package.py