pytorch
diff --git a/‎.ci/scripts/unittest-buck2.sh‎
Lines changed: 4 additions & 2 deletions b/‎.ci/scripts/unittest-buck2.sh‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎.lintrunner.toml‎
Lines changed: 2 additions & 0 deletions b/‎.lintrunner.toml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎CMakeLists.txt‎
Lines changed: 0 additions & 1 deletion b/‎CMakeLists.txt‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎CODEOWNERS‎
Lines changed: 12 additions & 12 deletions b/‎CODEOWNERS‎
Lines changed: 12 additions & 12 deletions
diff --git a/‎Test.cmake‎
Lines changed: 0 additions & 1 deletion b/‎Test.cmake‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎backends/arm/test/conftest.py‎
Lines changed: 4 additions & 1 deletion b/‎backends/arm/test/conftest.py‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎backends/cadence/aot/TARGETS‎
Lines changed: 13 additions & 1 deletion b/‎backends/cadence/aot/TARGETS‎
Lines changed: 13 additions & 1 deletion
diff --git a/‎backends/cadence/aot/compiler.py‎
Lines changed: 2 additions & 0 deletions b/‎backends/cadence/aot/compiler.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎backends/cadence/aot/export_example.py‎
Lines changed: 4 additions & 0 deletions b/‎backends/cadence/aot/export_example.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎backends/cadence/aot/functions.yaml‎
Lines changed: 5 additions & 0 deletions b/‎backends/cadence/aot/functions.yaml‎
Lines changed: 5 additions & 0 deletions
@@ -17,8 +17,10 @@ buck2 query "//backends/apple/... + //backends/example/... + \
 //kernels/optimized/... + //kernels/portable/... + //kernels/quantized/... + \
 //kernels/test/... + //runtime/... + //schema/... + //test/... + //util/..."
 
+UNBUILDABLE_OPTIMIZED_OPS_REGEX="gelu|fft_r2c|log_softmax"
+BUILDABLE_OPTIMIZED_OPS=$(buck2 query //kernels/optimized/cpu/... | grep -E -v $UNBUILDABLE_OPTIMIZED_OPS_REGEX)
 # TODO: expand the covered scope of Buck targets.
 # //runtime/kernel/... is failing because //third-party:torchgen_files's shell script can't find python on PATH.
 # //runtime/test/... requires Python torch, which we don't have in our OSS buck setup.
-buck2 build //runtime/backend/... //runtime/core/... //runtime/executor: //runtime/kernel/... //runtime/platform/...
-buck2 test //runtime/backend/... //runtime/core/... //runtime/executor: //runtime/kernel/... //runtime/platform/...
+buck2 test $BUILDABLE_OPTIMIZED_OPS //kernels/portable/... //runtime/backend/... //runtime/core/... \
+      //runtime/executor: //runtime/kernel/... //runtime/platform/...
@@ -218,6 +218,8 @@ exclude_patterns = [
     'examples/**',
     'extension/**',
     'kernels/optimized/**',
+    # Justified <functional> include.
+    'runtime/kernel/thread_parallel_interface.h',
     'scripts/**',
     'third-party/**',
     'util/**',
 
@@ -751,7 +751,6 @@ if(EXECUTORCH_BUILD_PTHREADPOOL
    AND EXECUTORCH_BUILD_CPUINFO
 )
   add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/threadpool)
-  add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/parallel)
 endif()
 
 if(EXECUTORCH_BUILD_PYBIND)
 
@@ -52,31 +52,31 @@
 /extension/export_util @kimishpatel
 /extension/flat_tensor @lucylq
 /extension/gguf_util @larryliu0820
-/extension/kernel_util @kimishpatel @manuelcandales
-/extension/llm @jackzhxng @iseeyuan @larryliu0820
-/extension/memory_allocator @JacobSzwejbka
+/extension/kernel_util @kimishpatel @manuelcandales @swolchok
+/extension/llm @jackzhxng @iseeyuan @larryliu0820 @swolchok
+/extension/memory_allocator @JacobSzwejbka @swolchok
 /extension/module @shoumikhin
-/extension/parallel @kimishpatel
+/extension/parallel @kimishpatel @swolchok
 /extension/pybindings @JacobSzwejbka @larryliu0820
-/extension/pytree @JacobSzwejbka
-# /extension/runner_util @dbort
+/extension/pytree @JacobSzwejbka @swolchok
+/extension/runner_util @swolchok
 /extension/tensor @shoumikhin
-# /extension/testing_util @dbort
-/extension/threadpool @kimishpatel
+/extension/testing_util @swolchok
+/extension/threadpool @kimishpatel @swolchok
 /extension/training @JacobSzwejbka
 
-/kernels @manuelcandales
+/kernels @manuelcandales @swolchok
 
 /profiler @tarun292 @Gasoonjia
 
-/runtime @JacobSzwejbka @lucylq
+/runtime @JacobSzwejbka @lucylq @swolchok
 /runtime/backend @cccclai
 
 /schema @JacobSzwejbka @lucylq
 
-/scripts @GregoryComer
+/scripts @GregoryComer @swolchok
 
-/shim @larryliu0820 @GregoryComer
+/shim @larryliu0820 @GregoryComer @swolchok
 
 /third-party @GregoryComer
 
 
@@ -13,7 +13,6 @@ if(BUILD_TESTING)
   add_subdirectory(extension/evalue_util/test)
   add_subdirectory(extension/kernel_util/test)
   add_subdirectory(extension/memory_allocator/test)
-  add_subdirectory(extension/parallel/test)
   add_subdirectory(extension/pytree/test)
   add_subdirectory(kernels/portable/cpu/util/test)
   add_subdirectory(kernels/prim_ops/test)
 
@@ -32,7 +32,10 @@ def pytest_configure(config):
     pytest._test_options = {}  # type: ignore[attr-defined]
     pytest._test_options["corstone_fvp"] = False  # type: ignore[attr-defined]
 
-    if config.option.arm_run_corstoneFVP:
+    if (
+        getattr(config.option, "arm_run_corstoneFVP", False)
+        and config.option.arm_run_corstoneFVP
+    ):
         corstone300_exists = shutil.which("FVP_Corstone_SSE-300_Ethos-U55")
         corstone320_exists = shutil.which("FVP_Corstone_SSE-320")
         if not (corstone300_exists and corstone320_exists):
 
@@ -115,11 +115,23 @@ python_library(
     ],
     deps = [
         "fbcode//caffe2:torch",
-        "fbcode//executorch/exir:scalar_type",
         "fbcode//executorch/backends/cadence/aot:utils",
     ],
 )
 
+python_library(
+    name = "ref_implementations",
+    srcs = [
+        "ref_implementations.py",
+    ],
+    typing = True,
+    deps = [
+        "fbcode//caffe2:torch",
+        "fbcode//executorch/exir:scalar_type",
+    ],
+)
+
+
 export_file(name = "functions.yaml")
 
 executorch_generated_lib(
 
@@ -198,6 +198,8 @@ def export_to_edge(
             _skip_dim_order=True,
             # Allow specific non-core aten ops in the IR.
             _core_aten_ops_exception_list=[
+                torch.ops.aten._linalg_det.default,
+                torch.ops.aten._linalg_svd.default,
                 torch.ops.aten._native_batch_norm_legit_functional.default,
                 torch.ops.aten.linear.default,
                 torch.ops.aten.linalg_vector_norm.default,
 
@@ -38,6 +38,8 @@ def export_model(
     example_inputs: Tuple[Any, ...],
     file_name: str = "CadenceDemoModel",
     run_and_compare: bool = True,
+    eps_error: float = 1e-1,
+    eps_warn: float = 1e-5,
 ):
     # create work directory for outputs and model binary
     working_dir = tempfile.mkdtemp(dir="/tmp")
@@ -89,4 +91,6 @@ def export_model(
             inputs=example_inputs,
             ref_outputs=ref_outputs,
             working_dir=working_dir,
+            eps_error=eps_error,
+            eps_warn=eps_warn,
         )
@@ -248,3 +248,8 @@
   kernels:
     - arg_meta: null
       kernel_name: impl::reference::quantized_fully_connected_per_tensor_out
+
+- func: cadence::requantize.out(Tensor input, Tensor in_scale, Tensor in_zero_point, Tensor out_scale, Tensor out_zero_point, ScalarType out_dtype, *, Tensor(a!) out) -> Tensor(a!)
+  kernels:
+    - arg_meta: null
+      kernel_name: impl::reference::requantize_out
Original file line number	Diff line number	Diff line change
`@@ -751,7 +751,6 @@ if(EXECUTORCH_BUILD_PTHREADPOOL`
`751`	`751`	`AND EXECUTORCH_BUILD_CPUINFO`
`752`	`752`	`)`
`753`	`753`	`add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/threadpool)`
`754`		`- add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/parallel)`
`755`	`754`	`endif()`
`756`	`755`
`757`	`756`	`if(EXECUTORCH_BUILD_PYBIND)`