extension/training builds in OSS

JacobSzwejbka · JacobSzwejbka · commit d6f58cdb7716 · 2024-09-17T12:45:42.000-07:00
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -183,6 +183,8 @@ option(EXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL "Build the Runner Util extension"
 
 option(EXECUTORCH_BUILD_EXTENSION_TENSOR "Build the Tensor extension" OFF)
 
+option(EXECUTORCH_BUILD_EXTENSION_TRAINING "Build the training extension" OFF)
+
 option(EXECUTORCH_BUILD_GTESTS "Build googletest based test binaries" OFF)
 
 option(EXECUTORCH_BUILD_MPS "Build the MPS backend" OFF)
@@ -636,6 +638,10 @@ if(EXECUTORCH_BUILD_EXTENSION_MODULE)
   add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/module)
 endif()
 
+if(EXECUTORCH_BUILD_EXTENSION_TRAINING)
+  add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/training)
+endif()
+
 if(EXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL)
   add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/runner_util)
 endif()
diff --git a/build/Utils.cmake b/build/Utils.cmake
@@ -68,6 +68,9 @@ function(executorch_print_configuration_summary)
   message(STATUS "  EXECUTORCH_BUILD_EXTENSION_TENSOR      : "
                  "${EXECUTORCH_BUILD_EXTENSION_TENSOR}"
   )
+  message(STATUS "  EXECUTORCH_BUILD_EXTENSION_TRAINING      : "
+                 "${EXECUTORCH_BUILD_EXTENSION_TRAINING}"
+  )
   message(
     STATUS
       "  EXECUTORCH_BUILD_FLATC                 : ${EXECUTORCH_BUILD_FLATC}"
diff --git a/build/cmake_deps.toml b/build/cmake_deps.toml
@@ -210,6 +210,31 @@ deps = [
   "executorch",
   "executorch_no_prim_ops",
 ]
+
+[targets.extension_training]
+buck_targets = [
+  "//extension/training/module:training_module",
+  "//extension/training/optimizer:sgd",
+]
+filters = [
+  ".cpp$",
+]
+deps = [
+  "executorch",
+  "portable_kernels",
+]
+
+[targets.train_xor]
+buck_targets = [
+  "//extension/training/examples/XOR:train_xor",
+]
+filters = [
+  ".cpp$",
+]
+deps = [
+  "executorch",
+  "portable_kernels",
+]
 # ---------------------------------- extension end ----------------------------------
 # ---------------------------------- binary start ----------------------------------
 
diff --git a/build/executorch-config.cmake b/build/executorch-config.cmake
@@ -48,6 +48,7 @@ set(lib_list
     extension_runner_util
     extension_tensor
     extension_threadpool
+    extension_training
     xnnpack_backend
     XNNPACK
     cpuinfo
diff --git a/extension/training/CMakeLists.txt b/extension/training/CMakeLists.txt
@@ -0,0 +1,39 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# Please this file formatted by running:
+# ~~~
+# cmake-format -i CMakeLists.txt
+# ~~~
+
+cmake_minimum_required(VERSION 3.19)
+
+# Source root directory for executorch.
+if(NOT EXECUTORCH_ROOT)
+  set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../..)
+endif()
+
+list(TRANSFORM _extension_training__srcs PREPEND "${EXECUTORCH_ROOT}/")
+message(FOOBAR="${_extension_training__srcs}")
+add_library(extension_training ${_extension_training__srcs})
+target_link_libraries(extension_training executorch_no_prim_ops)
+target_include_directories(extension_training PUBLIC ${EXECUTORCH_ROOT}/..)
+target_compile_options(extension_training PUBLIC ${_common_compile_options})
+
+list(TRANSFORM _train_xor__srcs PREPEND "${EXECUTORCH_ROOT}/")
+add_executable(train_xor ${_train_xor__srcs})
+target_link_libraries(
+train_xor gflags portable_ops_lib
+)
+target_compile_options(train_xor PUBLIC ${_common_compile_options})
+
+# Install libraries
+install(
+  TARGETS extension_training
+  DESTINATION lib
+  INCLUDES
+  DESTINATION ${_common_include_directories}
+)
diff --git a/extension/training/__init__.py b/extension/training/__init__.py
diff --git a/extension/training/examples/XOR/TARGETS b/extension/training/examples/XOR/TARGETS
@@ -1,40 +1,8 @@
 # Any targets that should be shared between fbcode and xplat must be defined in
 # targets.bzl. This file can contain fbcode-only targets.
 
-load("@fbcode_macros//build_defs:python_binary.bzl", "python_binary")
-load("@fbcode_macros//build_defs:python_library.bzl", "python_library")
 load(":targets.bzl", "define_common_targets")
 
 oncall("executorch")
 
 define_common_targets()
-
-python_library(
-    name = "model",
-    srcs = ["model.py"],
-    visibility = [],  # Private
-    deps = [
-        "//caffe2:torch",
-    ],
-)
-
-python_library(
-    name = "export_model_lib",
-    srcs = ["export_model_lib.py"],
-    visibility = [],
-    deps = [
-        ":model",
-        "//caffe2:torch",
-        "//executorch/exir:lib",
-    ],
-)
-
-python_binary(
-    name = "export_model",
-    main_function = ".export_model.main",
-    main_src = "export_model.py",
-    deps = [
-        ":export_model_lib",
-        "//caffe2:torch",
-    ],
-)
diff --git a/extension/training/examples/XOR/export_model.py b/extension/training/examples/XOR/export_model.py
@@ -8,9 +8,14 @@
 
 import argparse
 
+import os
+
 import torch
+from executorch.exir import to_edge
 
-from .export_model_lib import export_model
+from executorch.extension.training.examples.XOR.model import Net, TrainingNet
+from torch.export._trace import _export
+from torch.export.experimental import _export_forward_backward
 
 
 def main() -> None:
@@ -26,7 +31,27 @@ def main() -> None:
         help="Path to the directory to write xor.pte files to",
     )
     args = parser.parse_args()
-    export_model(args.outdir)
+
+    net = TrainingNet(Net())
+    x = torch.randn(1, 2)
+
+    # Captures the forward graph. The graph will look similar to the model definition now.
+    # Will move to export_for_training soon which is the api planned to be supported in the long term.
+    ep = _export(net, (x, torch.ones(1, dtype=torch.int64)), pre_dispatch=True)
+    # Captures the backward graph. The exported_program now contains the joint forward and backward graph.
+    ep = _export_forward_backward(ep)
+    # Lower the graph to edge dialect.
+    ep = to_edge(ep)
+    # Lower the graph to executorch.
+    ep = ep.to_executorch()
+
+    # Write out the .pte file.
+    os.makedirs(args.outdir, exist_ok=True)
+    outfile = os.path.join(args.outdir, "xor.pte")
+    with open(outfile, "wb") as fp:
+        fp.write(
+            ep.buffer,
+        )
 
 
 if __name__ == "__main__":
diff --git a/extension/training/examples/XOR/targets.bzl b/extension/training/examples/XOR/targets.bzl
@@ -21,3 +21,31 @@ def define_common_targets():
         external_deps = ["gflags"],
         define_static_target = True,
     )
+
+    runtime.python_library(
+        name = "model",
+        srcs = ["model.py"],
+        visibility = [],  # Private
+        deps = [
+            "//caffe2:torch",
+        ],
+    )
+
+    runtime.python_library(
+        name = "export_model_lib",
+        srcs = ["export_model_lib.py", "export_model.py"],
+        visibility = [],
+        deps = [
+            ":model",
+            "//caffe2:torch",
+            "//executorch/exir:lib",
+        ],
+    )
+
+    runtime.python_binary(
+        name = "export_model",
+        main_module = "executorch.extension.training.examples.XOR.export_model",
+        deps = [
+            ":export_model_lib",
+        ],
+    )
diff --git a/extension/training/optimizer/sgd.cpp b/extension/training/optimizer/sgd.cpp
@@ -7,15 +7,14 @@
  */
 
 #include <executorch/extension/training/optimizer/sgd.h>
-#include <executorch/kernels/test/FunctionHeaderWrapper.h> // Declares the operator
+#include <executorch/kernels/portable/NativeFunctions.h>
 
 #include <executorch/runtime/core/error.h>
 #include <executorch/runtime/kernel/kernel_runtime_context.h>
 
 using exec_aten::Tensor;
 using exec_aten::TensorImpl;
 using ::executorch::runtime::Error;
-using ::executorch::runtime::KernelRuntimeContext;
 
 namespace executorch {
 namespace extension {
@@ -73,10 +72,7 @@ Error SGD::step(const std::map<exec_aten::string_view, exec_aten::Tensor>&
         auto p = param_iter->second;
         if (weight_decay != 0) {
           // uses weight_decay specified and adds it to the gradient
-          torch::executor::aten::add_outf(context, d_p, p, weight_decay, d_p);
-          if (context.failure_state() != Error::Ok) {
-            return context.failure_state();
-          }
+          torch::executor::native::add_out(d_p, p, weight_decay, d_p);
         }
         if (momentum != 0) {
           Tensor buf(nullptr);
@@ -100,11 +96,8 @@ Error SGD::step(const std::map<exec_aten::string_view, exec_aten::Tensor>&
                 const_cast<TensorImpl::DimOrderType*>(d_p.dim_order().data()));
             buf = Tensor(buf_impl);
 #endif
-            torch::executor::aten::clone_outf(
-                context, d_p, exec_aten::MemoryFormat::Contiguous, buf);
-            if (context.failure_state() != Error::Ok) {
-              return context.failure_state();
-            }
+            torch::executor::native::clone_out(
+                d_p, exec_aten::MemoryFormat::Contiguous, buf);
 
             // save the state of the momentum buffer to be reused in later
             // epochs
@@ -115,31 +108,18 @@ Error SGD::step(const std::map<exec_aten::string_view, exec_aten::Tensor>&
                       .momentum_buffer();
 
             // update the momentum buffer and apply dampening
-            torch::executor::aten::mul_outf(context, buf, momentum, buf);
-            if (context.failure_state() != Error::Ok) {
-              return context.failure_state();
-            }
-            torch::executor::aten::add_outf(
-                context, buf, d_p, 1 - dampening, buf);
-            if (context.failure_state() != Error::Ok) {
-              return context.failure_state();
-            }
+            torch::executor::native::mul_out(context, buf, momentum, buf);
+            torch::executor::native::add_out(buf, d_p, 1 - dampening, buf);
           }
           if (nesterov) {
             // apply nesterov momentum
-            torch::executor::aten::add_outf(context, d_p, buf, momentum, d_p);
-            if (context.failure_state() != Error::Ok) {
-              return context.failure_state();
-            }
+            torch::executor::native::add_out(d_p, buf, momentum, d_p);
           } else {
             d_p = buf;
           }
         }
         // update the parameter using the gradient and learning rate
-        torch::executor::aten::add_outf(context, p, d_p, -1 * options.lr(), p);
-        if (context.failure_state() != Error::Ok) {
-          return context.failure_state();
-        }
+        torch::executor::native::add_out(p, d_p, -1 * options.lr(), p);
       }
     }
   }
diff --git a/extension/training/optimizer/targets.bzl b/extension/training/optimizer/targets.bzl
@@ -22,7 +22,6 @@ def define_common_targets():
                 "//executorch/kernels/portable/cpu:op_mul",
                 "//executorch/kernels/portable/cpu:op_clone",
                 "//executorch/kernels/portable:generated_lib_headers",
-                "//executorch/kernels/test:function_header_wrapper_portable",
             ]
 
         runtime.cxx_library(
@@ -34,7 +33,6 @@ def define_common_targets():
                 "sgd.h",
             ],
             exported_deps = [
-                "//executorch/runtime/kernel:kernel_runtime_context" + aten_suffix,
                 "//executorch/runtime/core/exec_aten:lib" + aten_suffix,
             ] + kernel_deps,
             visibility = [