diff --git a/build/cmake_deps.toml b/build/cmake_deps.toml
index 60d3ac71451..1430ea3a9ef 100644
--- a/build/cmake_deps.toml
+++ b/build/cmake_deps.toml
@@ -116,6 +116,20 @@ deps = [
   "executorch",
 ]
 
+[targets.optimized_native_cpu_ops_oss]
+buck_targets = [
+  "//configurations:optimized_native_cpu_ops_oss",
+]
+filters = [
+  ".cpp$",
+]
+excludes = [
+]
+deps = [
+  "executorch_no_prim_ops",
+  "executorch",
+  "portable_kernels",
+]
 # ---------------------------------- core end ----------------------------------
 # ---------------------------------- extension start ----------------------------------
 [targets.extension_data_loader]
@@ -341,5 +355,6 @@ deps = [
   "portable_kernels",
   "quantized_kernels",
   "xnnpack_backend",
+  "optimized_native_cpu_ops_oss",
 ]
 # ---------------------------------- LLama end ----------------------------------
diff --git a/configurations/targets.bzl b/configurations/targets.bzl
index 60a09d36269..6a5341c2904 100644
--- a/configurations/targets.bzl
+++ b/configurations/targets.bzl
@@ -28,7 +28,7 @@ def define_common_targets():
         ],
     )
 
-    # Add a commong configuration of cpu optimized operators. This adds a bit of confusion
+    # Add a common configuration of cpu optimized operators. This adds a bit of confusion
     # with the above executorch_cpu_optimized target. Generally it would make sense
     # to just add optimized operators to that target but because executorch_cpu_optimized
     # might be used elsewhere, I dont want to include ops in that target and find out
@@ -50,3 +50,21 @@ def define_common_targets():
             "@EXECUTORCH_CLIENTS",
         ],
     )
+
+    # TODO(T183193812): delete this target after optimized-oss.yaml is gone
+    executorch_generated_lib(
+        name = "optimized_native_cpu_ops_oss",
+        deps = [
+            "//executorch/kernels/optimized:optimized_operators",
+            "//executorch/kernels/optimized:optimized_oplist",
+            "//executorch/kernels/portable:executorch_aten_ops",
+            "//executorch/kernels/portable:operators",
+        ],
+        functions_yaml_target = "//executorch/kernels/optimized:optimized-oss.yaml",
+        fallback_yaml_target = "//executorch/kernels/portable:functions.yaml",
+        define_static_targets = True,
+        visibility = [
+            "//executorch/examples/...",
+            "@EXECUTORCH_CLIENTS",
+        ],
+    )
diff --git a/examples/models/llama2/runner/targets.bzl b/examples/models/llama2/runner/targets.bzl
index 9ee3f99567d..96d47ffce21 100644
--- a/examples/models/llama2/runner/targets.bzl
+++ b/examples/models/llama2/runner/targets.bzl
@@ -4,7 +4,8 @@ def _get_operator_lib(aten = False):
     if aten:
         return ["//executorch/kernels/aten:generated_lib"]
     elif runtime.is_oss:
-        return ["//executorch/kernels/portable:generated_lib", "//executorch/extension/llm/custom_ops:custom_ops"]
+        # TODO(T183193812): delete this path after optimized-oss.yaml is no more.
+        return ["//executorch/configurations:optimized_native_cpu_ops_oss", "//executorch/extension/llm/custom_ops:custom_ops"]
     else:
         return ["//executorch/configurations:optimized_native_cpu_ops", "//executorch/extension/llm/custom_ops:custom_ops"]
 
diff --git a/extension/llm/export/builder.py b/extension/llm/export/builder.py
index 4237ae7b3a7..338d997297d 100644
--- a/extension/llm/export/builder.py
+++ b/extension/llm/export/builder.py
@@ -16,6 +16,7 @@
 from executorch.backends.transforms.duplicate_dynamic_quant_chain import (
     DuplicateDynamicQuantChainPass,
 )
+from executorch.backends.xnnpack.passes.convert_to_linear import ConvertToLinearPass
 from executorch.exir import EdgeProgramManager
 from executorch.exir.backend.partitioner import Partitioner
 
@@ -382,6 +383,10 @@ def to_executorch(self) -> "LLMEdgeManager":
             ExecutorchBackendConfig(
                 extract_delegate_segments=True,
                 passes=[
+                    # If there are Linear operations left in the graph, let's execute
+                    # them with the optimized op_linear rather than materializing a
+                    # transpose followed by a regular op_mm.
+                    ConvertToLinearPass(),
                     QuantFusionPass(),
                 ],
                 memory_planning_pass=MemoryPlanningPass(
diff --git a/kernels/optimized/targets.bzl b/kernels/optimized/targets.bzl
index c06a1dc079d..88afe5011d3 100644
--- a/kernels/optimized/targets.bzl
+++ b/kernels/optimized/targets.bzl
@@ -19,6 +19,14 @@ def define_common_targets():
         ],
     )
 
+    runtime.export_file(
+        name = "optimized-oss.yaml",
+        visibility = [
+            "//executorch/...",
+            "@EXECUTORCH_CLIENTS",
+        ],
+    )
+
     runtime.cxx_library(
         name = "optimized_operators",
         srcs = [],