Add XOR Model Example (pytorch#5397)

JacobSzwejbka · facebook-github-bot · commit 7c661d727dc0 · 2024-09-16T16:12:55.000-07:00
Summary: Pull Request resolved: pytorch#5397 Add a real basic model/use-case to showcase the full training loop from model definition to optimizer.step() Reviewed By: iseeyuan Differential Revision: D62771102 fbshipit-source-id: 48dc01f680085e3192aa4b91396f80ca646d1640
diff --git a/extension/training/examples/XOR/TARGETS b/extension/training/examples/XOR/TARGETS
@@ -0,0 +1,40 @@
+# Any targets that should be shared between fbcode and xplat must be defined in
+# targets.bzl. This file can contain fbcode-only targets.
+
+load("@fbcode_macros//build_defs:python_binary.bzl", "python_binary")
+load("@fbcode_macros//build_defs:python_library.bzl", "python_library")
+load(":targets.bzl", "define_common_targets")
+
+oncall("executorch")
+
+define_common_targets()
+
+python_library(
+    name = "model",
+    srcs = ["model.py"],
+    visibility = [],  # Private
+    deps = [
+        "//caffe2:torch",
+    ],
+)
+
+python_library(
+    name = "export_model_lib",
+    srcs = ["export_model_lib.py"],
+    visibility = [],
+    deps = [
+        ":model",
+        "//caffe2:torch",
+        "//executorch/exir:lib",
+    ],
+)
+
+python_binary(
+    name = "export_model",
+    main_function = ".export_model.main",
+    main_src = "export_model.py",
+    deps = [
+        ":export_model_lib",
+        "//caffe2:torch",
+    ],
+)
diff --git a/extension/training/examples/XOR/export_model.py b/extension/training/examples/XOR/export_model.py
@@ -0,0 +1,33 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# pyre-unsafe
+
+import argparse
+
+import torch
+
+from .export_model_lib import export_model
+
+
+def main() -> None:
+    torch.manual_seed(0)
+    parser = argparse.ArgumentParser(
+        prog="export_model",
+        description="Exports an nn.Module model to ExecuTorch .pte files",
+    )
+    parser.add_argument(
+        "--outdir",
+        type=str,
+        required=True,
+        help="Path to the directory to write xor.pte files to",
+    )
+    args = parser.parse_args()
+    export_model(args.outdir)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/extension/training/examples/XOR/export_model_lib.py b/extension/training/examples/XOR/export_model_lib.py
@@ -0,0 +1,40 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# pyre-unsafe
+
+import os
+
+import torch
+from executorch.exir import to_edge
+from executorch.extension.training.examples.XOR.model import TrainingNet
+from torch.export._trace import _export
+from torch.export.experimental import _export_forward_backward
+
+from .model import Net
+
+
+def export_model(outdir):
+    net = TrainingNet(Net())
+    x = torch.randn(1, 2)
+
+    # Captures the forward graph. The graph will look similar to the model definition now.
+    # Will move to export_for_training soon which is the api planned to be supported in the long term.
+    ep = _export(net, (x, torch.ones(1, dtype=torch.int64)), pre_dispatch=True)
+    # Captures the backward graph. The exported_program now contains the joint forward and backward graph.
+    ep = _export_forward_backward(ep)
+    # Lower the graph to edge dialect.
+    ep = to_edge(ep)
+    # Lower the graph to executorch.
+    ep = ep.to_executorch()
+
+    # Write out the .pte file.
+    os.makedirs(outdir, exist_ok=True)
+    outfile = os.path.join(outdir, "xor.pte")
+    with open(outfile, "wb") as fp:
+        fp.write(
+            ep.buffer,
+        )
diff --git a/extension/training/examples/XOR/model.py b/extension/training/examples/XOR/model.py
@@ -0,0 +1,34 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# pyre-unsafe
+
+import torch.nn as nn
+from torch.nn import functional as F
+
+
+# Basic Net for XOR
+class Net(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.linear = nn.Linear(2, 10)
+        self.linear2 = nn.Linear(10, 2)
+
+    def forward(self, x):
+        return self.linear2(F.sigmoid(self.linear(x)))
+
+
+# On device training requires the loss to be embedded in the model (and be the first output).
+# We wrap the original model here and add the loss calculation. This will be the model we export.
+class TrainingNet(nn.Module):
+    def __init__(self, net):
+        super().__init__()
+        self.net = net
+        self.loss = nn.CrossEntropyLoss()
+
+    def forward(self, input, label):
+        pred = self.net(input)
+        return self.loss(pred, label), pred.detach().argmax(dim=1)
diff --git a/extension/training/examples/XOR/targets.bzl b/extension/training/examples/XOR/targets.bzl
@@ -0,0 +1,23 @@
+load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
+
+def define_common_targets():
+    """Defines targets that should be shared between fbcode and xplat.
+
+    The directory containing this targets.bzl file should also contain both
+    TARGETS and BUCK files that call this function.
+    """
+
+    runtime.cxx_binary(
+        name = "train_xor",
+        srcs = ["train.cpp"],
+        deps = [
+            "//executorch/extension/training/module:training_module",
+            "//executorch/extension/tensor:tensor",
+            "//executorch/extension/training/optimizer:sgd",
+            "//executorch/runtime/executor:program",
+            "//executorch/extension/data_loader:file_data_loader",
+            "//executorch/kernels/portable:generated_lib",
+        ],
+        external_deps = ["gflags"],
+        define_static_target = True,
+    )
diff --git a/extension/training/examples/XOR/train.cpp b/extension/training/examples/XOR/train.cpp
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <executorch/extension/data_loader/file_data_loader.h>
+#include <executorch/extension/tensor/tensor.h>
+#include <executorch/extension/training/module/training_module.h>
+#include <executorch/extension/training/optimizer/sgd.h>
+#include <gflags/gflags.h>
+#include <random>
+
+#pragma clang diagnostic ignored \
+    "-Wbraced-scalar-init" // {0} below upsets clang.
+
+using executorch::extension::FileDataLoader;
+using executorch::extension::training::optimizer::SGD;
+using executorch::extension::training::optimizer::SGDOptions;
+using executorch::runtime::Error;
+using executorch::runtime::Result;
+DEFINE_string(model_path, "xor.pte", "Model serialized in flatbuffer format.");
+
+int main(int argc, char** argv) {
+  gflags::ParseCommandLineFlags(&argc, &argv, true);
+  if (argc != 1) {
+    std::string msg = "Extra commandline args: ";
+    for (int i = 1 /* skip argv[0] (program name) */; i < argc; i++) {
+      msg += argv[i];
+    }
+    ET_LOG(Error, "%s", msg.c_str());
+    return 1;
+  }
+
+  // Load the model file.
+  executorch::runtime::Result<executorch::extension::FileDataLoader>
+      loader_res =
+          executorch::extension::FileDataLoader::from(FLAGS_model_path.c_str());
+  if (loader_res.error() != Error::Ok) {
+    ET_LOG(Error, "Failed to open model file: %s", FLAGS_model_path.c_str());
+    return 1;
+  }
+  auto loader = std::make_unique<executorch::extension::FileDataLoader>(
+      std::move(loader_res.get()));
+
+  auto mod = executorch::extension::training::TrainingModule(std::move(loader));
+
+  // Create full data set of input and labels.
+  std::vector<std::pair<
+      executorch::extension::TensorPtr,
+      executorch::extension::TensorPtr>>
+      data_set;
+  data_set.push_back( // XOR(1, 1) = 0
+      {executorch::extension::make_tensor_ptr<float>({1, 2}, {1, 1}),
+       executorch::extension::make_tensor_ptr<long>({1}, {0})});
+  data_set.push_back( // XOR(0, 0) = 0
+      {executorch::extension::make_tensor_ptr<float>({1, 2}, {0, 0}),
+       executorch::extension::make_tensor_ptr<long>({1}, {0})});
+  data_set.push_back( // XOR(1, 0) = 1
+      {executorch::extension::make_tensor_ptr<float>({1, 2}, {1, 0}),
+       executorch::extension::make_tensor_ptr<long>({1}, {1})});
+  data_set.push_back( // XOR(0, 1) = 1
+      {executorch::extension::make_tensor_ptr<float>({1, 2}, {0, 1}),
+       executorch::extension::make_tensor_ptr<long>({1}, {1})});
+
+  // Create optimizer.
+  // Get the params and names
+  auto param_res = mod.named_parameters("forward");
+  if (param_res.error() != Error::Ok) {
+    ET_LOG(Error, "Failed to get named parameters");
+    return 1;
+  }
+
+  SGDOptions options{0.1};
+  SGD optimizer(param_res.get(), options);
+
+  // Randomness to sample the data set.
+  std::default_random_engine URBG{std::random_device{}()};
+  std::uniform_int_distribution<int> dist{
+      0, static_cast<int>(data_set.size()) - 1};
+
+  // Train the model.
+  size_t num_epochs = 5000;
+  for (int i = 0; i < num_epochs; i++) {
+    int index = dist(URBG);
+    auto& data = data_set[index];
+    const auto& results = mod.execute_forward_backward(
+        "forward", {*data.first.get(), *data.second.get()});
+    if (results.error() != Error::Ok) {
+      ET_LOG(Error, "Failed to execute forward_backward");
+      return 1;
+    }
+    if (i % 500 == 0 || i == num_epochs - 1) {
+      ET_LOG(
+          Info,
+          "Step %d, Loss %f, Input [%.0f, %.0f], Prediction %ld, Label %ld",
+          i,
+          results.get()[0].toTensor().const_data_ptr<float>()[0],
+          data.first->const_data_ptr<float>()[0],
+          data.first->const_data_ptr<float>()[1],
+          results.get()[1].toTensor().const_data_ptr<int64_t>()[0],
+          data.second->const_data_ptr<int64_t>()[0]);
+    }
+    optimizer.step(mod.named_gradients("forward").get());
+  }
+}