pytorch
diff --git a/‎.ci/docker/ci_commit_pins/pytorch.txt‎
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/ci_commit_pins/pytorch.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/docker/conda-env-ci.txt‎
Lines changed: 1 addition & 0 deletions b/‎.ci/docker/conda-env-ci.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.ci/scripts/setup-macos.sh‎
Lines changed: 1 addition & 0 deletions b/‎.ci/scripts/setup-macos.sh‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/workflows/pull.yml‎
Lines changed: 2 additions & 17 deletions b/‎.github/workflows/pull.yml‎
Lines changed: 2 additions & 17 deletions
diff --git a/‎.mypy.ini‎
Lines changed: 3 additions & 0 deletions b/‎.mypy.ini‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎CMakeLists.txt‎
Lines changed: 36 additions & 0 deletions b/‎CMakeLists.txt‎
Lines changed: 36 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 4 additions & 3 deletions b/‎README.md‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎backends/arm/_passes/insert_table_ops.py‎
Lines changed: 2 additions & 1 deletion b/‎backends/arm/_passes/insert_table_ops.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎backends/arm/arm_partitioner.py‎
Lines changed: 1 addition & 0 deletions b/‎backends/arm/arm_partitioner.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/arm/operator_support/tosa_supported_operators.py‎
Lines changed: 1 addition & 0 deletions b/‎backends/arm/operator_support/tosa_supported_operators.py‎
Lines changed: 1 addition & 0 deletions
@@ -1 +1 @@
-0a94bb432ed75cc2d950d81b2921363218a7e459
+27e35de6c288bffad1b4d18b393579c1d1a95547
@@ -1,4 +1,5 @@
 cmake=3.22.1
 ninja=1.10.2
 libuv
+llvm-openmp
 pkg-config
@@ -121,6 +121,7 @@ setup_macos_env_variables
 # NB: we need buck2 in all cases because cmake build also depends on calling
 # buck2 atm
 install_buck
+brew install libomp
 install_pip_dependencies
 
 # TODO(huydhn): Unlike our self-hosted runner, GitHub runner doesn't have access
 
@@ -212,17 +212,14 @@ jobs:
       docker-image: executorch-ubuntu-22.04-clang12
       submodules: 'true'
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 90
+      timeout: 180
       script: |
         # The generic Linux job chooses to use base env, not the one setup by the image
         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
         conda activate "${CONDA_ENV}"
 
         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
 
-        # install pybind
-        bash install_executorch.sh --pybind xnnpack --use-pt-pinned-commit
-
         # install Llava requirements
         bash examples/models/llama/install_requirements.sh
         bash examples/models/llava/install_requirements.sh
@@ -483,9 +480,6 @@ jobs:
 
         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
 
-        # install pybind
-        bash install_executorch.sh --pybind xnnpack --use-pt-pinned-commit
-
         # install phi-3-mini requirements
         bash examples/models/phi-3-mini/install_requirements.sh
 
@@ -513,9 +507,6 @@ jobs:
 
         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
 
-        # install pybind
-        bash install_executorch.sh --pybind xnnpack --use-pt-pinned-commit
-
         # install llama requirements
         bash examples/models/llama/install_requirements.sh
 
@@ -535,17 +526,14 @@ jobs:
       docker-image: executorch-ubuntu-22.04-clang12
       submodules: 'true'
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 90
+      timeout: 180
       script: |
         # The generic Linux job chooses to use base env, not the one setup by the image
         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
         conda activate "${CONDA_ENV}"
 
         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
 
-        # install pybind
-        bash install_executorch.sh --pybind xnnpack --use-pt-pinned-commit
-
         # install llama requirements
         bash examples/models/llama/install_requirements.sh
 
@@ -573,9 +561,6 @@ jobs:
 
         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
 
-        # install pybind
-        bash install_executorch.sh --pybind xnnpack --use-pt-pinned-commit
-
         # install llama requirements
         bash examples/models/llama/install_requirements.sh
 
 
@@ -77,6 +77,9 @@ ignore_missing_imports = True
 [mypy-ruamel]
 ignore_missing_imports = True
 
+[mypy-serializer.*]
+ignore_missing_imports = True
+
 [mypy-setuptools.*]
 ignore_missing_imports = True
 
 
@@ -240,6 +240,13 @@ cmake_dependent_option(
   "NOT EXECUTORCH_BUILD_ARM_BAREMETAL" OFF
 )
 
+
+if(EXECUTORCH_BUILD_EXTENSION_TRAINING)
+  set(EXECUTORCH_BUILD_EXTENSION_TENSOR ON)
+  set(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER ON)
+  set(EXECUTORCH_BUILD_EXTENSION_MODULE ON)
+endif()
+
 if(EXECUTORCH_BUILD_KERNELS_CUSTOM_AOT)
   set(EXECUTORCH_BUILD_EXTENSION_TENSOR ON)
   set(EXECUTORCH_BUILD_KERNELS_CUSTOM ON)
@@ -802,6 +809,35 @@ if(EXECUTORCH_BUILD_PYBIND)
   install(TARGETS portable_lib
           LIBRARY DESTINATION executorch/extension/pybindings
   )
+
+  if(EXECUTORCH_BUILD_EXTENSION_TRAINING)
+
+    set(_pybind_training_dep_libs
+        ${TORCH_PYTHON_LIBRARY}
+        etdump
+        executorch
+        util
+        torch
+        extension_training
+    )
+
+    if(EXECUTORCH_BUILD_XNNPACK)
+      # need to explicitly specify XNNPACK and microkernels-prod
+      # here otherwise uses XNNPACK and microkernel-prod symbols from libtorch_cpu
+      list(APPEND _pybind_training_dep_libs xnnpack_backend XNNPACK microkernels-prod)
+    endif()
+
+    # pybind training
+    pybind11_add_module(_training_lib SHARED extension/training/pybindings/_training_lib.cpp)
+
+    target_include_directories(_training_lib PRIVATE ${TORCH_INCLUDE_DIRS})
+    target_compile_options(_training_lib PUBLIC ${_pybind_compile_options})
+    target_link_libraries(_training_lib PRIVATE ${_pybind_training_dep_libs})
+
+    install(TARGETS _training_lib
+            LIBRARY DESTINATION executorch/extension/training/pybindings
+    )
+  endif()
 endif()
 
 if(EXECUTORCH_BUILD_KERNELS_CUSTOM)
 
@@ -12,9 +12,9 @@
   <hr>
 </div>
 
-**ExecuTorch** is an end-to-end solution for on-device inference and training. It powers much of Meta's on-device AI functionality including: Facebook, Oculus, Meta Glasses, Instagram, WhatsApp and more.
+**ExecuTorch** is an end-to-end solution for on-device inference and training. It powers much of Meta's on-device AI experiences across Facebook, Instagram, Meta Quest, Ray-Ban Meta Smart Glasses, WhatsApp, and more.
 
-It covers a wide gamut of models including: LLMs (Large Language Models), CV (Computer Vision), ASR (Automatic Speech Recognition), TTS (Text to Speech).
+It supports a wide range of models including LLMs (Large Language Models), CV (Computer Vision), ASR (Automatic Speech Recognition), and TTS (Text to Speech).
 
 Platform Support:
 - Operating Systems:
@@ -26,9 +26,10 @@ Platform Support:
 
 - Hardware Acceleration:
   - Apple
-  - ARM
+  - Arm
   - Cadence
   - MediaTek
+  - Qualcomm
   - Vulkan
   - XNNPACK
 
 
@@ -31,7 +31,7 @@ class InsertTableOpsPass(ExportPass):
     """
     For ops in self.table_ops they need to be serialized as a TOSA TABLE. This pass replaces these
     edge ops with a tosa._table(input: Tensor, target_str: str) where target_str == str(node.target).
-    When loweringthe _table node target_str will be used to find the corresponding torch operator
+    When lowering the _table node target_str will be used to find the corresponding torch operator
     which will be used to produce the table values in operators/op_table.py.
     """
 
@@ -43,6 +43,7 @@ class InsertTableOpsPass(ExportPass):
         exir_ops.edge.aten.sigmoid.default: torch.sigmoid,
         exir_ops.edge.aten.tanh.default: torch.tanh,
         exir_ops.edge.aten.hardsigmoid.default: torch.nn.functional.hardsigmoid,
+        exir_ops.edge.aten.hardswish.default: torch.nn.functional.hardswish,
     }
 
     def __init__(self, exported_program: ExportedProgram) -> None:
 
@@ -115,6 +115,7 @@ def ops_to_not_decompose(
     ) -> Tuple[List[torch._ops.OpOverload], Optional[Callable[[torch.fx.Node], bool]]]:
         ops_to_not_decompose_if_quant_op = [
             torch.ops.aten.hardsigmoid.default,
+            torch.ops.aten.hardswish.default,
         ]
 
         def filter_fn(node: torch.fx.Node) -> bool:
 
@@ -81,6 +81,7 @@ def is_node_supported(self, submodules, node: fx.Node) -> bool:
             exir_ops.edge.aten.permute_copy.default,
             exir_ops.edge.aten.hardsigmoid.default,
             exir_ops.edge.aten.hardtanh.default,
+            exir_ops.edge.aten.hardswish.default,
             exir_ops.edge.aten.convolution.default,
             exir_ops.edge.aten.div.Tensor,
             exir_ops.edge.aten.eq.Tensor,
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-0a94bb432ed75cc2d950d81b2921363218a7e459`
	`1`	`+27e35de6c288bffad1b4d18b393579c1d1a95547`
Original file line number	Diff line number	Diff line change
`@@ -115,6 +115,7 @@ def ops_to_not_decompose(`
`115`	`115`	`) -> Tuple[List[torch._ops.OpOverload], Optional[Callable[[torch.fx.Node], bool]]]:`
`116`	`116`	`ops_to_not_decompose_if_quant_op = [`
`117`	`117`	`torch.ops.aten.hardsigmoid.default,`
	`118`	`+ torch.ops.aten.hardswish.default,`
`118`	`119`	`]`
`119`	`120`
`120`	`121`	`def filter_fn(node: torch.fx.Node) -> bool:`