From 72fb9b721c4be46097bad3ec4bfd574e57f27e89 Mon Sep 17 00:00:00 2001
From: Scott Wolchok <swolchok@meta.com>
Date: Tue, 27 May 2025 12:30:31 -0700
Subject: [PATCH 1/9] Update

[ghstack-poisoned]
---
 .ci/docker/ci_commit_pins/pytorch.txt              | 2 +-
 install_requirements.py                            | 2 +-
 runtime/core/portable_type/c10/c10/macros/Macros.h | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.ci/docker/ci_commit_pins/pytorch.txt b/.ci/docker/ci_commit_pins/pytorch.txt
index 75a95d0522b..306a7bf0a4a 100644
--- a/.ci/docker/ci_commit_pins/pytorch.txt
+++ b/.ci/docker/ci_commit_pins/pytorch.txt
@@ -1 +1 @@
-01f1cc44cbbfdf6307aa01b803a4ee22f9ade946
+b40585022f80385c0bbf5c0d08c172c391ed2318
diff --git a/install_requirements.py b/install_requirements.py
index 2fcd65ea338..8f8ad106c5d 100644
--- a/install_requirements.py
+++ b/install_requirements.py
@@ -71,7 +71,7 @@ def python_is_compatible():
 #
 # NOTE: If you're changing, make the corresponding change in .ci/docker/ci_commit_pins/pytorch.txt
 # by picking the hash from the same date in https://hud.pytorch.org/hud/pytorch/pytorch/nightly/
-NIGHTLY_VERSION = "dev20250422"
+NIGHTLY_VERSION = "dev20250527"
 
 
 def install_requirements(use_pytorch_nightly):
diff --git a/runtime/core/portable_type/c10/c10/macros/Macros.h b/runtime/core/portable_type/c10/c10/macros/Macros.h
index 7e61ad7e26b..0947be6c0d0 100644
--- a/runtime/core/portable_type/c10/c10/macros/Macros.h
+++ b/runtime/core/portable_type/c10/c10/macros/Macros.h
@@ -241,7 +241,7 @@ using namespace c10::xpu;
 #ifdef __HIPCC__
 // Unlike CUDA, HIP requires a HIP header to be included for __host__ to work.
 // We do this #include here so that C10_HOST_DEVICE and friends will Just Work.
-// See https://github.com/ROCm-Developer-Tools/HIP/issues/441
+// See https://github.com/ROCm/hip/issues/441
 #include <hip/hip_runtime.h>
 #endif
 
@@ -286,7 +286,7 @@ constexpr uint32_t CUDA_THREADS_PER_BLOCK_FALLBACK = 256;
 #define C10_MIN_BLOCKS_PER_SM(threads_per_block, blocks_per_sm)        \
   ((((threads_per_block) * (blocks_per_sm) <= CUDA_MAX_THREADS_PER_SM) \
         ? (blocks_per_sm)                                              \
-        : ((CUDA_MAX_THREADS_PER_SM + (threads_per_block)-1) /         \
+        : ((CUDA_MAX_THREADS_PER_SM + (threads_per_block) - 1) /       \
            (threads_per_block))))
 // C10_LAUNCH_BOUNDS is analogous to __launch_bounds__
 #define C10_LAUNCH_BOUNDS_0 \

From 0b369e7745149dceae83728b15716a788f705b1b Mon Sep 17 00:00:00 2001
From: Scott Wolchok <swolchok@meta.com>
Date: Tue, 27 May 2025 12:40:27 -0700
Subject: [PATCH 2/9] 05-27 doesn't seem to be working, try 05-26

[ghstack-poisoned]
---
 .ci/docker/ci_commit_pins/pytorch.txt                  | 2 +-
 install_requirements.py                                | 2 +-
 runtime/core/portable_type/c10/c10/util/BFloat16-inl.h | 5 +----
 runtime/core/portable_type/c10/c10/util/BFloat16.h     | 5 +----
 4 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/.ci/docker/ci_commit_pins/pytorch.txt b/.ci/docker/ci_commit_pins/pytorch.txt
index 306a7bf0a4a..32d0140c45e 100644
--- a/.ci/docker/ci_commit_pins/pytorch.txt
+++ b/.ci/docker/ci_commit_pins/pytorch.txt
@@ -1 +1 @@
-b40585022f80385c0bbf5c0d08c172c391ed2318
+8c16d0e4047a8ac5885baf52e8779fb3e36f2987
diff --git a/install_requirements.py b/install_requirements.py
index 8f8ad106c5d..b9b6c72142f 100644
--- a/install_requirements.py
+++ b/install_requirements.py
@@ -71,7 +71,7 @@ def python_is_compatible():
 #
 # NOTE: If you're changing, make the corresponding change in .ci/docker/ci_commit_pins/pytorch.txt
 # by picking the hash from the same date in https://hud.pytorch.org/hud/pytorch/pytorch/nightly/
-NIGHTLY_VERSION = "dev20250527"
+NIGHTLY_VERSION = "dev20250526"
 
 
 def install_requirements(use_pytorch_nightly):
diff --git a/runtime/core/portable_type/c10/c10/util/BFloat16-inl.h b/runtime/core/portable_type/c10/c10/util/BFloat16-inl.h
index 10ab0c828d7..1ed866f78d9 100644
--- a/runtime/core/portable_type/c10/c10/util/BFloat16-inl.h
+++ b/runtime/core/portable_type/c10/c10/util/BFloat16-inl.h
@@ -10,14 +10,11 @@ C10_CLANG_DIAGNOSTIC_PUSH()
 C10_CLANG_DIAGNOSTIC_IGNORE("-Wimplicit-int-float-conversion")
 #endif
 
-#if defined(SYCL_EXT_ONEAPI_BFLOAT16_MATH_FUNCTIONS)
 #if defined(CL_SYCL_LANGUAGE_VERSION)
 #include <CL/sycl.hpp> // for SYCL 1.2.1
-#else
+#elif defined(SYCL_LANGUAGE_VERSION)
 #include <sycl/sycl.hpp> // for SYCL 2020
 #endif
-#include <ext/oneapi/bfloat16.hpp>
-#endif
 
 namespace c10 {
 
diff --git a/runtime/core/portable_type/c10/c10/util/BFloat16.h b/runtime/core/portable_type/c10/c10/util/BFloat16.h
index 93d0ec54fb0..0f7cecda46b 100644
--- a/runtime/core/portable_type/c10/c10/util/BFloat16.h
+++ b/runtime/core/portable_type/c10/c10/util/BFloat16.h
@@ -14,14 +14,11 @@
 #include <cuda_bf16.h>
 #endif
 
-#if defined(SYCL_EXT_ONEAPI_BFLOAT16_MATH_FUNCTIONS)
 #if defined(CL_SYCL_LANGUAGE_VERSION)
 #include <CL/sycl.hpp> // for SYCL 1.2.1
-#else
+#elif defined(SYCL_LANGUAGE_VERSION)
 #include <sycl/sycl.hpp> // for SYCL 2020
 #endif
-#include <ext/oneapi/bfloat16.hpp>
-#endif
 
 namespace c10 {
 

From d57b556fe414e80c673461dc895ac5412fad3810 Mon Sep 17 00:00:00 2001
From: Scott Wolchok <swolchok@meta.com>
Date: Tue, 27 May 2025 12:56:45 -0700
Subject: [PATCH 3/9] Update

[ghstack-poisoned]
---
 .ci/docker/ci_commit_pins/pytorch.txt | 2 +-
 install_requirements.py               | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.ci/docker/ci_commit_pins/pytorch.txt b/.ci/docker/ci_commit_pins/pytorch.txt
index 32d0140c45e..b7ce7c5ce7e 100644
--- a/.ci/docker/ci_commit_pins/pytorch.txt
+++ b/.ci/docker/ci_commit_pins/pytorch.txt
@@ -1 +1 @@
-8c16d0e4047a8ac5885baf52e8779fb3e36f2987
+53ecb8159aa28b3c015917acaa89604cfae0d2c6
diff --git a/install_requirements.py b/install_requirements.py
index b9b6c72142f..31b843c83a7 100644
--- a/install_requirements.py
+++ b/install_requirements.py
@@ -71,7 +71,7 @@ def python_is_compatible():
 #
 # NOTE: If you're changing, make the corresponding change in .ci/docker/ci_commit_pins/pytorch.txt
 # by picking the hash from the same date in https://hud.pytorch.org/hud/pytorch/pytorch/nightly/
-NIGHTLY_VERSION = "dev20250526"
+NIGHTLY_VERSION = "dev20250525"
 
 
 def install_requirements(use_pytorch_nightly):

From 0be8b6eebf1c6eed63c9fd389160faf1f8f71b92 Mon Sep 17 00:00:00 2001
From: Scott Wolchok <swolchok@meta.com>
Date: Tue, 27 May 2025 21:07:21 -0700
Subject: [PATCH 4/9] update typing-extensions version in buck

[ghstack-poisoned]
---
 pyproject.toml      | 1 +
 third-party/TARGETS | 6 +++---
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 70fbbea18e5..7faa2c53304 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -66,6 +66,7 @@ dependencies=[
   "ruamel.yaml",
   "sympy",
   "tabulate",
+  # See also third-party/TARGETS for buck's typing-extensions version.
   "typing-extensions>=4.10.0",
   # Keep this version in sync with: ./backends/apple/coreml/scripts/install_requirements.sh
   "coremltools==8.3; platform_system == 'Darwin'",
diff --git a/third-party/TARGETS b/third-party/TARGETS
index 0ec62c1536f..c80bd9448b3 100644
--- a/third-party/TARGETS
+++ b/third-party/TARGETS
@@ -23,9 +23,9 @@ prebuilt_python_library_defs = {
         "url": "https://files.pythonhosted.org/packages/12/fc/a4d5a7554e0067677823f7265cb3ae22aed8a238560b5133b58cda252dad/PyYAML-6.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl",
     },
     "typing-extensions": {
-        "out": "typing_extensions-4.2.0-py3-none-any.whl",
-        "sha1": "ff0849420e94f425818bff5d0f25e3cdfaba8601",
-        "url": "https://files.pythonhosted.org/packages/75/e1/932e06004039dd670c9d5e1df0cd606bf46e29a28e65d5bb28e894ea29c9/typing_extensions-4.2.0-py3-none-any.whl",
+        "out": "typing_extensions-4.13.2-py3-none-any.whl",
+        "sha1": "85a14b4d38ca0e528328b6b591769e1d989f12b8",
+        "url": "https://files.pythonhosted.org/packages/8b/54/b1ae86c0973cc6f0210b53d508ca3641fb6d0c56823f288d108bc7ab3cc8/typing_extensions-4.13.2-py3-none-any.whl",
     },
     "wcwidth": {
         "out": "wcwidth-0.1.5-py2.py3-none-any.whl",

From d9f3957a84feea51a6db177bee81b013e261121f Mon Sep 17 00:00:00 2001
From: Scott Wolchok <swolchok@meta.com>
Date: Tue, 3 Jun 2025 11:00:13 -0700
Subject: [PATCH 5/9] Update

[ghstack-poisoned]
---
 .ci/docker/ci_commit_pins/pytorch.txt | 2 +-
 install_requirements.py               | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.ci/docker/ci_commit_pins/pytorch.txt b/.ci/docker/ci_commit_pins/pytorch.txt
index 75a95d0522b..40a61733e8d 100644
--- a/.ci/docker/ci_commit_pins/pytorch.txt
+++ b/.ci/docker/ci_commit_pins/pytorch.txt
@@ -1 +1 @@
-01f1cc44cbbfdf6307aa01b803a4ee22f9ade946
+64247892a0ca8ed045ad0b530eb87c3dd66590ea
diff --git a/install_requirements.py b/install_requirements.py
index 2fcd65ea338..31ce482b317 100644
--- a/install_requirements.py
+++ b/install_requirements.py
@@ -71,7 +71,7 @@ def python_is_compatible():
 #
 # NOTE: If you're changing, make the corresponding change in .ci/docker/ci_commit_pins/pytorch.txt
 # by picking the hash from the same date in https://hud.pytorch.org/hud/pytorch/pytorch/nightly/
-NIGHTLY_VERSION = "dev20250422"
+NIGHTLY_VERSION = "dev20250602"
 
 
 def install_requirements(use_pytorch_nightly):

From 845b14d09bfd028f9923094ec25931a47b08da39 Mon Sep 17 00:00:00 2001
From: Scott Wolchok <swolchok@meta.com>
Date: Tue, 3 Jun 2025 11:10:10 -0700
Subject: [PATCH 6/9] Update

[ghstack-poisoned]
---
 install_requirements.py                                | 2 +-
 runtime/core/portable_type/c10/c10/macros/Macros.h     | 4 ++--
 runtime/core/portable_type/c10/c10/util/BFloat16-inl.h | 5 +----
 runtime/core/portable_type/c10/c10/util/BFloat16.h     | 5 +----
 4 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/install_requirements.py b/install_requirements.py
index 31ce482b317..dcbf7e160ec 100644
--- a/install_requirements.py
+++ b/install_requirements.py
@@ -82,7 +82,7 @@ def install_requirements(use_pytorch_nightly):
         # been installed on CI before this step, so pip won't reinstall them
         f"torch==2.8.0.{NIGHTLY_VERSION}" if use_pytorch_nightly else "torch",
         (
-            f"torchvision==0.22.0.{NIGHTLY_VERSION}"
+            f"torchvision==0.23.0.{NIGHTLY_VERSION}"
             if use_pytorch_nightly
             else "torchvision"
         ),  # For testing.
diff --git a/runtime/core/portable_type/c10/c10/macros/Macros.h b/runtime/core/portable_type/c10/c10/macros/Macros.h
index 7e61ad7e26b..0947be6c0d0 100644
--- a/runtime/core/portable_type/c10/c10/macros/Macros.h
+++ b/runtime/core/portable_type/c10/c10/macros/Macros.h
@@ -241,7 +241,7 @@ using namespace c10::xpu;
 #ifdef __HIPCC__
 // Unlike CUDA, HIP requires a HIP header to be included for __host__ to work.
 // We do this #include here so that C10_HOST_DEVICE and friends will Just Work.
-// See https://github.com/ROCm-Developer-Tools/HIP/issues/441
+// See https://github.com/ROCm/hip/issues/441
 #include <hip/hip_runtime.h>
 #endif
 
@@ -286,7 +286,7 @@ constexpr uint32_t CUDA_THREADS_PER_BLOCK_FALLBACK = 256;
 #define C10_MIN_BLOCKS_PER_SM(threads_per_block, blocks_per_sm)        \
   ((((threads_per_block) * (blocks_per_sm) <= CUDA_MAX_THREADS_PER_SM) \
         ? (blocks_per_sm)                                              \
-        : ((CUDA_MAX_THREADS_PER_SM + (threads_per_block)-1) /         \
+        : ((CUDA_MAX_THREADS_PER_SM + (threads_per_block) - 1) /       \
            (threads_per_block))))
 // C10_LAUNCH_BOUNDS is analogous to __launch_bounds__
 #define C10_LAUNCH_BOUNDS_0 \
diff --git a/runtime/core/portable_type/c10/c10/util/BFloat16-inl.h b/runtime/core/portable_type/c10/c10/util/BFloat16-inl.h
index 10ab0c828d7..1ed866f78d9 100644
--- a/runtime/core/portable_type/c10/c10/util/BFloat16-inl.h
+++ b/runtime/core/portable_type/c10/c10/util/BFloat16-inl.h
@@ -10,14 +10,11 @@ C10_CLANG_DIAGNOSTIC_PUSH()
 C10_CLANG_DIAGNOSTIC_IGNORE("-Wimplicit-int-float-conversion")
 #endif
 
-#if defined(SYCL_EXT_ONEAPI_BFLOAT16_MATH_FUNCTIONS)
 #if defined(CL_SYCL_LANGUAGE_VERSION)
 #include <CL/sycl.hpp> // for SYCL 1.2.1
-#else
+#elif defined(SYCL_LANGUAGE_VERSION)
 #include <sycl/sycl.hpp> // for SYCL 2020
 #endif
-#include <ext/oneapi/bfloat16.hpp>
-#endif
 
 namespace c10 {
 
diff --git a/runtime/core/portable_type/c10/c10/util/BFloat16.h b/runtime/core/portable_type/c10/c10/util/BFloat16.h
index 93d0ec54fb0..0f7cecda46b 100644
--- a/runtime/core/portable_type/c10/c10/util/BFloat16.h
+++ b/runtime/core/portable_type/c10/c10/util/BFloat16.h
@@ -14,14 +14,11 @@
 #include <cuda_bf16.h>
 #endif
 
-#if defined(SYCL_EXT_ONEAPI_BFLOAT16_MATH_FUNCTIONS)
 #if defined(CL_SYCL_LANGUAGE_VERSION)
 #include <CL/sycl.hpp> // for SYCL 1.2.1
-#else
+#elif defined(SYCL_LANGUAGE_VERSION)
 #include <sycl/sycl.hpp> // for SYCL 2020
 #endif
-#include <ext/oneapi/bfloat16.hpp>
-#endif
 
 namespace c10 {
 

From 20407f0e6701aa54dea329b49ec03dad9e6af361 Mon Sep 17 00:00:00 2001
From: Scott Wolchok <swolchok@meta.com>
Date: Tue, 3 Jun 2025 11:17:28 -0700
Subject: [PATCH 7/9] Update

[ghstack-poisoned]
---
 install_requirements.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/install_requirements.py b/install_requirements.py
index dcbf7e160ec..38188d08300 100644
--- a/install_requirements.py
+++ b/install_requirements.py
@@ -89,7 +89,7 @@ def install_requirements(use_pytorch_nightly):
     ]
 
     EXAMPLES_REQUIREMENTS = [
-        f"torchaudio==2.6.0.{NIGHTLY_VERSION}" if use_pytorch_nightly else "torchaudio",
+        f"torchaudio==2.8.0.{NIGHTLY_VERSION}" if use_pytorch_nightly else "torchaudio",
     ]
 
     # Assemble the list of requirements to actually install.

From 5f85bcad58f16b72c714ecd7cbd4d9ea79a253cb Mon Sep 17 00:00:00 2001
From: Scott Wolchok <swolchok@meta.com>
Date: Tue, 3 Jun 2025 12:24:19 -0700
Subject: [PATCH 8/9] Update

[ghstack-poisoned]
---
 .ci/docker/ci_commit_pins/pytorch.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.ci/docker/ci_commit_pins/pytorch.txt b/.ci/docker/ci_commit_pins/pytorch.txt
index 40a61733e8d..6982ba9c780 100644
--- a/.ci/docker/ci_commit_pins/pytorch.txt
+++ b/.ci/docker/ci_commit_pins/pytorch.txt
@@ -1 +1 @@
-64247892a0ca8ed045ad0b530eb87c3dd66590ea
+0d0058d90de410cbc998089eb5e475776d2ad55d

From 3a2516a7df6046d41bd156ecd883b0caa1b31f68 Mon Sep 17 00:00:00 2001
From: Scott Wolchok <swolchok@meta.com>
Date: Tue, 3 Jun 2025 14:12:32 -0700
Subject: [PATCH 9/9] Update

[ghstack-poisoned]
---
 .Package.swift/kernels_portable/dummy.swift   |   0
 .../kernels_portable_debug/dummy.swift        |   0
 .ci/docker/ci_commit_pins/pytorch.txt         |   2 +-
 .ci/scripts/test_ios_ci.sh                    |   4 -
 .ci/scripts/test_model.sh                     |  42 +-
 .github/scripts/label_utils.py                |   4 +-
 .github/scripts/trymerge.py                   |  10 +-
 .github/workflows/apple-perf.yml              |   9 -
 .github/workflows/apple.yml                   |   9 -
 .github/workflows/check-labels.yml            |   2 +-
 .github/workflows/trunk.yml                   |  10 +-
 CMakeLists.txt                                | 115 +++--
 CMakePresets.json                             |   6 +-
 Package.swift                                 |  94 ++--
 .../delegate/ETCoreMLDefaultModelExecutor.mm  |   2 +-
 .../runtime/delegate/ETCoreMLModelManager.mm  |   4 +-
 backends/apple/mps/install_requirements.sh    |   8 -
 backends/apple/mps/setup.md                   |   8 +-
 backends/arm/_passes/__init__.py              |   1 +
 backends/arm/_passes/arm_pass_manager.py      |   3 +
 .../arm/_passes/decompose_groupnorm_pass.py   | 208 ++++++++
 .../arm/_passes/decompose_layernorm_pass.py   |   8 +-
 .../tosa_supported_operators.py               |   2 +
 backends/arm/operators/op_conv2d.py           |  12 +-
 backends/arm/operators/op_view.py             |   8 +-
 backends/arm/scripts/parse_test_names.py      |   1 +
 backends/arm/scripts/pre-push                 |   4 +-
 backends/arm/test/ops/test_group_norm.py      | 145 ++++++
 .../arm/test/ops/test_linalg_vector_norm.py   |   6 +-
 backends/cadence/aot/TARGETS                  |   3 +
 backends/cadence/aot/memory_constraints.py    |  24 +-
 backends/cadence/aot/ops_registrations.py     |  33 ++
 backends/cadence/aot/replace_ops.py           |  16 +-
 .../cadence/aot/tests/test_memory_passes.py   | 464 ++++++++++++------
 .../aot/tests/test_replace_ops_passes.py      |  40 +-
 backends/cadence/utils/facto_util.py          |  54 +-
 backends/qualcomm/builders/op_avg_pool2d.py   |  47 +-
 backends/qualcomm/quantizer/annotators.py     |   1 +
 backends/qualcomm/tests/models.py             |  17 +-
 backends/qualcomm/tests/test_qnn_delegate.py  | 135 ++++-
 backends/qualcomm/utils/utils.py              |  52 +-
 .../_passes/squeeze_unsqueeze_inputs.py       |   8 +-
 backends/vulkan/runtime/VulkanBackend.cpp     |   7 +
 backends/vulkan/runtime/gen_vulkan_spv.py     |  18 +-
 .../vulkan/runtime/graph/ComputeGraph.cpp     |  22 +-
 backends/vulkan/runtime/graph/ComputeGraph.h  |  27 +-
 .../vulkan/runtime/graph/ops/DispatchNode.cpp |  26 +-
 .../vulkan/runtime/graph/ops/DispatchNode.h   |   6 +
 .../runtime/graph/ops/DynamicDispatchNode.cpp |  58 ++-
 .../runtime/graph/ops/DynamicDispatchNode.h   |  15 +
 .../vulkan/runtime/graph/ops/ExecuteNode.h    |   2 +-
 .../graph/ops/glsl/buffer_to_nchw.yaml        |   1 +
 .../graph/ops/glsl/conv2d_dw_output_tile.glsl |  28 +-
 .../runtime/graph/ops/glsl/conv2d_pw.glsl     |   4 +-
 .../graph/ops/glsl/conv2d_pw_s1p0.glsl        |  35 +-
 .../runtime/graph/ops/glsl/image_to_nchw.yaml |   1 +
 .../nchw_to_bitw8_image_nobitw8buffer.glsl    |   8 +-
 .../nchw_to_bitw8_image_nobitw8buffer.yaml    |   3 +
 .../graph/ops/glsl/nchw_to_buffer.glsl        |  14 +-
 .../graph/ops/glsl/nchw_to_buffer.yaml        |   4 +
 .../runtime/graph/ops/glsl/nchw_to_image.glsl |  14 +-
 .../runtime/graph/ops/glsl/nchw_to_image.yaml |  10 +
 .../runtime/graph/ops/glsl/select.glslh       |  74 +++
 .../graph/ops/glsl/select_batch_4d.glsl       |  52 --
 .../graph/ops/glsl/select_channel_3d.glsl     |  50 --
 .../graph/ops/glsl/select_channel_4d.glsl     |  65 ---
 .../graph/ops/glsl/select_height_3d.glsl      |  62 ---
 .../graph/ops/glsl/select_height_3d.yaml      |  10 -
 .../graph/ops/glsl/select_height_4d.glsl      |  64 ---
 .../graph/ops/glsl/select_height_4d.yaml      |  10 -
 .../graph/ops/glsl/select_width_3d.glsl       |  63 ---
 .../graph/ops/glsl/select_width_3d.yaml       |  10 -
 .../graph/ops/glsl/select_width_4d.glsl       |  67 ---
 .../graph/ops/glsl/select_width_4d.yaml       |  10 -
 .../vulkan/runtime/graph/ops/glsl/slice.glslh |  53 ++
 .../graph/ops/glsl/slice_packed_dim.glsl      |  67 ---
 .../graph/ops/glsl/slice_packed_dim.yaml      |  11 -
 .../graph/ops/glsl/slice_unpacked_dim.glsl    |  68 ---
 .../graph/ops/glsl/slice_unpacked_dim.yaml    |  10 -
 .../graph/ops/glsl/transfer_buffer.glsl       |  58 +++
 ...t_channel_3d.yaml => transfer_buffer.yaml} |   9 +-
 .../graph/ops/glsl/transfer_texture.glsl      |  83 ++++
 ..._channel_4d.yaml => transfer_texture.yaml} |   9 +-
 .../vulkan/runtime/graph/ops/glsl/where.glsl  | 111 +++++
 .../glsl/{select_batch_4d.yaml => where.yaml} |   8 +-
 .../vulkan/runtime/graph/ops/impl/Clone.cpp   |   4 +-
 .../vulkan/runtime/graph/ops/impl/Common.cpp  |  33 ++
 .../vulkan/runtime/graph/ops/impl/Common.h    |  47 ++
 .../runtime/graph/ops/impl/Convolution.cpp    |   5 +-
 .../vulkan/runtime/graph/ops/impl/Select.cpp  | 193 ++++----
 .../vulkan/runtime/graph/ops/impl/Slice.cpp   | 230 ++++-----
 .../vulkan/runtime/graph/ops/impl/Staging.cpp |  33 +-
 .../runtime/graph/ops/impl/Transfer.cpp       | 114 +++++
 .../vulkan/runtime/graph/ops/impl/Transfer.h  |  40 ++
 .../vulkan/runtime/graph/ops/impl/Where.cpp   | 126 +++++
 .../graph/ops/utils/ShaderNameUtils.cpp       |   1 +
 .../runtime/graph/ops/utils/StagingUtils.cpp  |  12 +-
 .../runtime/graph/ops/utils/StagingUtils.h    |   3 +-
 backends/vulkan/runtime/vk_api/Types.h        |   2 +-
 backends/vulkan/test/op_tests/cases.py        |  34 +-
 .../op_tests/utils/gen_correctness_base.py    |   8 +-
 .../test/op_tests/utils/gen_correctness_vk.py |   2 +
 backends/vulkan/test/test_vulkan_delegate.py  |  47 ++
 backends/vulkan/test/utils/test_utils.cpp     |   6 +-
 .../vulkan/test/vulkan_compute_api_test.cpp   |  22 +-
 backends/xnnpack/CMakeLists.txt               |  46 +-
 backends/xnnpack/README.md                    |   6 +-
 backends/xnnpack/operators/node_visitor.py    |  37 +-
 backends/xnnpack/runtime/XNNCompiler.cpp      |  43 +-
 .../xnnpack/serialization/runtime_schema.fbs  |   6 +-
 backends/xnnpack/serialization/schema.fbs     |   6 +-
 .../serialization/xnnpack_graph_schema.py     |  10 +
 backends/xnnpack/third-party/cpuinfo          |   2 +-
 codegen/api/et_cpp.py                         |  18 +-
 codegen/api/types/__init__.py                 |   4 +-
 codegen/api/types/signatures.py               |   5 +-
 codegen/gen.py                                |  35 +-
 codegen/test/test_executorch_custom_ops.py    |   2 +-
 codegen/test/test_executorch_gen.py           |   8 +-
 codegen/tools/gen_oplist.py                   |   1 +
 devtools/etrecord/_etrecord.py                |  35 ++
 devtools/etrecord/tests/etrecord_test.py      |  19 +-
 .../_intermediate_output_capturer.py          |  39 +-
 .../intermediate_output_capturer_test.py      |   2 -
 docs/source/_static/img/swiftpm_xcode2.png    | Bin 60796 -> 55550 bytes
 .../backend-delegates-xnnpack-reference.md    |   2 +-
 docs/source/backends-mps.md                   |   8 +-
 .../tutorial-xnnpack-delegate-lowering.md     |  10 +-
 .../using-executorch-building-from-source.md  |  20 +-
 docs/source/using-executorch-ios.md           |  10 +-
 examples/arm/aot_arm_compiler.py              |  10 +-
 .../LLaMA/LLaMA.xcodeproj/project.pbxproj     |   2 +-
 .../LLaMA/docs/delegates/mps_README.md        |   1 -
 .../demo-apps/react-native/rnllama/README.md  |   2 +-
 .../ios/rnllama.xcodeproj/project.pbxproj     |   2 +-
 examples/models/llama/README.md               |  18 +-
 .../llama/source_transformation/quantize.py   |  58 +--
 examples/models/phi-3-mini/README.md          |   2 +-
 examples/models/qwen3/README.md               |   2 +-
 examples/qualcomm/oss_scripts/deit.py         | 148 ++++++
 examples/qualcomm/oss_scripts/efficientnet.py | 145 ++++++
 .../oss_scripts/llama/runner/runner.cpp       |   6 +-
 examples/xnnpack/README.md                    |  10 +-
 exir/passes/constant_prop_pass.py             |  37 +-
 exir/tests/test_passes.py                     |  28 ++
 export/TARGETS                                |   1 +
 export/export.py                              |  67 ++-
 export/recipe.py                              |   9 +-
 .../LlmModuleInstrumentationTest.kt           |  32 +-
 .../org/pytorch/executorch/ModuleE2ETest.kt   |   7 +-
 .../executorch/ModuleInstrumentationTest.kt   |   8 +-
 .../org/pytorch/executorch/TestFileUtils.kt   |  16 +
 .../Exported/ExecuTorch+Tensor.swift          |   4 +-
 .../ExecuTorch/Exported/ExecuTorchTensor.h    |   4 +-
 .../ExecuTorch/__tests__/TensorTest.swift     | 305 +++++-------
 .../Benchmark.xcodeproj/project.pbxproj       |   6 +-
 extension/llm/tokenizers                      |   2 +-
 extension/pybindings/README.md                |  20 +-
 extension/threadpool/cpuinfo_utils.cpp        |  26 +
 install_executorch.py                         |  88 +---
 install_requirements.py                       |   2 +-
 kernels/aten/functions.yaml                   |   4 +
 kernels/optimized/cpu/op_linear.cpp           | 134 ++++-
 kernels/portable/cpu/op_rand.cpp              |  50 ++
 kernels/portable/cpu/op_randn.cpp             |  50 ++
 kernels/portable/functions.yaml               |  12 +
 kernels/test/CMakeLists.txt                   |   2 +
 kernels/test/op_linear_test.cpp               |  98 +++-
 kernels/test/op_rand_test.cpp                 |  95 ++++
 kernels/test/op_randn_test.cpp                |  93 ++++
 kernels/test/targets.bzl                      |   2 +
 pytest.ini                                    |   2 +
 runtime/backend/backend_init_context.h        |   8 +-
 .../core/portable_type/c10/c10/targets.bzl    |   2 -
 scripts/build_apple_frameworks.sh             |   9 -
 scripts/test_ios.sh                           |   4 -
 .../kernels/portable/op_registration_util.bzl |  16 +
 third-party/TARGETS                           |  12 -
 tools/cmake/Codegen.cmake                     |   3 +-
 179 files changed, 3960 insertions(+), 1988 deletions(-)
 delete mode 100644 .Package.swift/kernels_portable/dummy.swift
 delete mode 100644 .Package.swift/kernels_portable_debug/dummy.swift
 delete mode 100755 backends/apple/mps/install_requirements.sh
 create mode 100644 backends/arm/_passes/decompose_groupnorm_pass.py
 create mode 100644 backends/arm/test/ops/test_group_norm.py
 create mode 100644 backends/vulkan/runtime/graph/ops/glsl/select.glslh
 delete mode 100644 backends/vulkan/runtime/graph/ops/glsl/select_batch_4d.glsl
 delete mode 100644 backends/vulkan/runtime/graph/ops/glsl/select_channel_3d.glsl
 delete mode 100644 backends/vulkan/runtime/graph/ops/glsl/select_channel_4d.glsl
 delete mode 100644 backends/vulkan/runtime/graph/ops/glsl/select_height_3d.glsl
 delete mode 100644 backends/vulkan/runtime/graph/ops/glsl/select_height_3d.yaml
 delete mode 100644 backends/vulkan/runtime/graph/ops/glsl/select_height_4d.glsl
 delete mode 100644 backends/vulkan/runtime/graph/ops/glsl/select_height_4d.yaml
 delete mode 100644 backends/vulkan/runtime/graph/ops/glsl/select_width_3d.glsl
 delete mode 100644 backends/vulkan/runtime/graph/ops/glsl/select_width_3d.yaml
 delete mode 100644 backends/vulkan/runtime/graph/ops/glsl/select_width_4d.glsl
 delete mode 100644 backends/vulkan/runtime/graph/ops/glsl/select_width_4d.yaml
 create mode 100644 backends/vulkan/runtime/graph/ops/glsl/slice.glslh
 delete mode 100644 backends/vulkan/runtime/graph/ops/glsl/slice_packed_dim.glsl
 delete mode 100644 backends/vulkan/runtime/graph/ops/glsl/slice_packed_dim.yaml
 delete mode 100644 backends/vulkan/runtime/graph/ops/glsl/slice_unpacked_dim.glsl
 delete mode 100644 backends/vulkan/runtime/graph/ops/glsl/slice_unpacked_dim.yaml
 create mode 100644 backends/vulkan/runtime/graph/ops/glsl/transfer_buffer.glsl
 rename backends/vulkan/runtime/graph/ops/glsl/{select_channel_3d.yaml => transfer_buffer.yaml} (54%)
 create mode 100644 backends/vulkan/runtime/graph/ops/glsl/transfer_texture.glsl
 rename backends/vulkan/runtime/graph/ops/glsl/{select_channel_4d.yaml => transfer_texture.yaml} (52%)
 create mode 100644 backends/vulkan/runtime/graph/ops/glsl/where.glsl
 rename backends/vulkan/runtime/graph/ops/glsl/{select_batch_4d.yaml => where.yaml} (64%)
 create mode 100644 backends/vulkan/runtime/graph/ops/impl/Common.cpp
 create mode 100644 backends/vulkan/runtime/graph/ops/impl/Common.h
 create mode 100644 backends/vulkan/runtime/graph/ops/impl/Transfer.cpp
 create mode 100644 backends/vulkan/runtime/graph/ops/impl/Transfer.h
 create mode 100644 backends/vulkan/runtime/graph/ops/impl/Where.cpp
 create mode 100644 examples/qualcomm/oss_scripts/deit.py
 create mode 100644 examples/qualcomm/oss_scripts/efficientnet.py
 create mode 100644 extension/android/executorch_android/src/androidTest/java/org/pytorch/executorch/TestFileUtils.kt
 create mode 100644 kernels/portable/cpu/op_rand.cpp
 create mode 100644 kernels/portable/cpu/op_randn.cpp
 create mode 100644 kernels/test/op_rand_test.cpp
 create mode 100644 kernels/test/op_randn_test.cpp

diff --git a/.Package.swift/kernels_portable/dummy.swift b/.Package.swift/kernels_portable/dummy.swift
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/.Package.swift/kernels_portable_debug/dummy.swift b/.Package.swift/kernels_portable_debug/dummy.swift
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/.ci/docker/ci_commit_pins/pytorch.txt b/.ci/docker/ci_commit_pins/pytorch.txt
index 6982ba9c780..bb8caf3ffeb 100644
--- a/.ci/docker/ci_commit_pins/pytorch.txt
+++ b/.ci/docker/ci_commit_pins/pytorch.txt
@@ -1 +1 @@
-0d0058d90de410cbc998089eb5e475776d2ad55d
+5616fa4a68718ead203314a3467f7dd9547153ae
diff --git a/.ci/scripts/test_ios_ci.sh b/.ci/scripts/test_ios_ci.sh
index 16f2e16de50..6908d61483c 100755
--- a/.ci/scripts/test_ios_ci.sh
+++ b/.ci/scripts/test_ios_ci.sh
@@ -42,10 +42,6 @@ say "Installing CoreML Backend Requirements"
 
 ./backends/apple/coreml/scripts/install_requirements.sh
 
-say "Installing MPS Backend Requirements"
-
-./backends/apple/mps/install_requirements.sh
-
 say "Exporting Models"
 
 python3 -m examples.portable.scripts.export --model_name="$MODEL_NAME" --segment_alignment=0x4000
diff --git a/.ci/scripts/test_model.sh b/.ci/scripts/test_model.sh
index 38c45dc3fb7..aa74f3a5447 100755
--- a/.ci/scripts/test_model.sh
+++ b/.ci/scripts/test_model.sh
@@ -49,14 +49,24 @@ prepare_artifacts_upload() {
 }
 
 build_cmake_executor_runner() {
+  local backend_string_select="${1:-}"
   echo "Building executor_runner"
   rm -rf ${CMAKE_OUTPUT_DIR}
-  cmake -DCMAKE_BUILD_TYPE=Debug \
-      -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-      -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
-      -B${CMAKE_OUTPUT_DIR} .
-
-  cmake --build ${CMAKE_OUTPUT_DIR} -j4 --config Debug
+  mkdir ${CMAKE_OUTPUT_DIR}
+  if [[ "$backend_string_select" == "XNNPACK" ]]; then
+    echo "Backend $backend_string_select selected"
+    (cd ${CMAKE_OUTPUT_DIR} \
+      && cmake -DCMAKE_BUILD_TYPE=Release \
+        -DEXECUTORCH_BUILD_XNNPACK=ON \
+        -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" ..)
+    cmake --build ${CMAKE_OUTPUT_DIR} -j4
+  else
+    cmake -DCMAKE_BUILD_TYPE=Debug \
+        -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
+        -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
+        -B${CMAKE_OUTPUT_DIR} .
+    cmake --build ${CMAKE_OUTPUT_DIR} -j4 --config Debug
+  fi
 }
 
 run_portable_executor_runner() {
@@ -111,19 +121,6 @@ test_model() {
   run_portable_executor_runner
 }
 
-build_cmake_xnn_executor_runner() {
-  echo "Building xnn_executor_runner"
-
-  (rm -rf ${CMAKE_OUTPUT_DIR} \
-    && mkdir ${CMAKE_OUTPUT_DIR} \
-    && cd ${CMAKE_OUTPUT_DIR} \
-    && retry cmake -DCMAKE_BUILD_TYPE=Release \
-      -DEXECUTORCH_BUILD_XNNPACK=ON \
-      -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" ..)
-
-  cmake --build ${CMAKE_OUTPUT_DIR} -j4
-}
-
 test_model_with_xnnpack() {
   WITH_QUANTIZATION=$1
   WITH_DELEGATION=$2
@@ -148,12 +145,11 @@ test_model_with_xnnpack() {
 
   # Run test model
   if [[ "${BUILD_TOOL}" == "buck2" ]]; then
+    # TODO eventually buck should also use consolidated executor runners
     buck2 run //examples/xnnpack:xnn_executor_runner -- --model_path "${OUTPUT_MODEL_PATH}"
   elif [[ "${BUILD_TOOL}" == "cmake" ]]; then
-    if [[ ! -f ${CMAKE_OUTPUT_DIR}/backends/xnnpack/xnn_executor_runner ]]; then
-      build_cmake_xnn_executor_runner
-    fi
-    ./${CMAKE_OUTPUT_DIR}/backends/xnnpack/xnn_executor_runner --model_path "${OUTPUT_MODEL_PATH}"
+    build_cmake_executor_runner "XNNPACK"
+    ./${CMAKE_OUTPUT_DIR}/executor_runner --model_path "${OUTPUT_MODEL_PATH}"
   else
     echo "Invalid build tool ${BUILD_TOOL}. Only buck2 and cmake are supported atm"
     exit 1
diff --git a/.github/scripts/label_utils.py b/.github/scripts/label_utils.py
index 53daf222250..609316cfe2b 100644
--- a/.github/scripts/label_utils.py
+++ b/.github/scripts/label_utils.py
@@ -22,9 +22,7 @@
 
 LABEL_ERR_MSG_TITLE = "This PR needs a `release notes:` label"
 LABEL_ERR_MSG = f"""# {LABEL_ERR_MSG_TITLE}
-If your change should be included in the release notes (i.e. would users of this library care about this change?), please use a label starting with `release notes:`.
-
-If not, please add the `release notes: none` label.
+If your change should be included in the release notes (i.e. would users of this library care about this change?), please use a label starting with `release notes:`. This helps us keep track and include your important work in the next release notes.
 
 To add a label, you can comment to pytorchbot, for example
 `@pytorchbot label "release notes: none"`
diff --git a/.github/scripts/trymerge.py b/.github/scripts/trymerge.py
index 5a45089508a..124fc4ecbad 100755
--- a/.github/scripts/trymerge.py
+++ b/.github/scripts/trymerge.py
@@ -59,12 +59,7 @@
     patterns_to_regex,
     retries_decorator,
 )
-from label_utils import (
-    gh_add_labels,
-    gh_remove_label,
-    has_required_labels,
-    LABEL_ERR_MSG,
-)
+from label_utils import gh_add_labels, gh_remove_label
 from trymerge_explainer import get_revert_message, TryMergeExplainer
 
 # labels
@@ -2116,9 +2111,6 @@ def merge(
     # Check for approvals
     find_matching_merge_rule(pr, repo, skip_mandatory_checks=True)
 
-    if not has_required_labels(pr):
-        raise RuntimeError(LABEL_ERR_MSG.lstrip(" #"))
-
     if ignore_current:
         checks = pr.get_checkrun_conclusions()
         _, failing, _ = categorize_checks(
diff --git a/.github/workflows/apple-perf.yml b/.github/workflows/apple-perf.yml
index ed8e21a8fb4..846dc576f43 100644
--- a/.github/workflows/apple-perf.yml
+++ b/.github/workflows/apple-perf.yml
@@ -188,11 +188,6 @@ jobs:
             backends/apple/coreml/scripts/install_requirements.sh
         fi
 
-        if [[ ${{ matrix.config }} == *"mps"* ]]; then
-          PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
-            backends/apple/mps/install_requirements.sh
-        fi
-
         # Install requirements for export_llama
         PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/models/llama/install_requirements.sh
 
@@ -379,10 +374,6 @@ jobs:
         # Install CoreML Backend Requirements
         PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
           backends/apple/coreml/scripts/install_requirements.sh
-
-        # Install MPS Backend Requirements
-        PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
-          backends/apple/mps/install_requirements.sh
         echo "::endgroup::"
 
         echo "::group::Build ExecuTorch iOS frameworks"
diff --git a/.github/workflows/apple.yml b/.github/workflows/apple.yml
index 9c164ff5085..dcd4a0ab2a3 100644
--- a/.github/workflows/apple.yml
+++ b/.github/workflows/apple.yml
@@ -154,7 +154,6 @@ jobs:
           "backend_xnnpack"
           "kernels_custom"
           "kernels_optimized"
-          "kernels_portable"
           "kernels_quantized"
           "threadpool"
         )
@@ -169,10 +168,6 @@ jobs:
         PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
         backends/apple/coreml/scripts/install_requirements.sh
 
-        # Install MPS Backend Requirements
-        PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
-        backends/apple/mps/install_requirements.sh
-
         # Build iOS Frameworks
         PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output scripts/build_apple_frameworks.sh
 
@@ -307,10 +302,6 @@ jobs:
         # Install CoreML Backend Requirements
         PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
           backends/apple/coreml/scripts/install_requirements.sh
-
-        # Install MPS Backend Requirements
-        PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
-          backends/apple/mps/install_requirements.sh
         echo "::endgroup::"
 
         echo "::group::Build ExecuTorch iOS frameworks"
diff --git a/.github/workflows/check-labels.yml b/.github/workflows/check-labels.yml
index 19c70c820a8..65da3052155 100644
--- a/.github/workflows/check-labels.yml
+++ b/.github/workflows/check-labels.yml
@@ -51,4 +51,4 @@ jobs:
           PR_NUM: ${{ github.event.number || github.event.inputs.pr_number }}
         run: |
           set -ex
-          python3 .github/scripts/check_labels.py --exit-non-zero "${PR_NUM}"
+          python3 .github/scripts/check_labels.py "${PR_NUM}"
diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml
index b4ce196e8ad..cab558c9b56 100644
--- a/.github/workflows/trunk.yml
+++ b/.github/workflows/trunk.yml
@@ -305,7 +305,7 @@ jobs:
         # Install requirements
         ${CONDA_RUN} sh install_requirements.sh
         ${CONDA_RUN} sh backends/apple/coreml/scripts/install_requirements.sh
-        ${CONDA_RUN} python install_executorch.py --pybind coreml
+        ${CONDA_RUN} python install_executorch.py
         ${CONDA_RUN} sh examples/models/llama/install_requirements.sh
 
         # Test ANE llama
@@ -414,11 +414,7 @@ jobs:
         # Setup executorch
         PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool cmake
 
-        if [[ "${MODE}" == "mps" ]]; then
-          # Install mps delegate
-          PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/mps/install_requirements.sh
-          echo "Finishing installing mps."
-        elif [[ "${MODE}" == "coreml" ]]; then
+        if [[ "${MODE}" == "coreml" ]]; then
           # Install coreml delegate
           PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/install_requirements.sh
           echo "Finishing installing coreml."
@@ -504,8 +500,6 @@ jobs:
         PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}"
         PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/install_requirements.sh
         echo "Finishing installing coreml."
-        PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/mps/install_requirements.sh
-        echo "Finishing installing mps."
 
         # Build and test coreml model
         MODELS=(mv3 ic4 resnet50 edsr mobilebert w2l)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 65a1eb50a77..10d4fcd95ba 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -39,13 +39,16 @@
 # ~~~
 # cmake-format -i CMakeLists.txt
 # ~~~
-# It should also be cmake-lint clean.
+# It should also be checked with a linter via
+# ~~~
+# cmake-lint CMakeLists.txt
+# ~~~
 #
 
 cmake_minimum_required(VERSION 3.24)
 project(executorch)
 
-# MARK: - Start EXECUTORCH_H12025_BUILD_MIGRATION --------------------------------------------------
+# MARK: - Start EXECUTORCH_H12025_BUILD_MIGRATION
 
 include(${PROJECT_SOURCE_DIR}/tools/cmake/common/preset.cmake)
 include(${PROJECT_SOURCE_DIR}/tools/cmake/Utils.cmake)
@@ -82,24 +85,25 @@ include(${PROJECT_SOURCE_DIR}/tools/cmake/preset/default.cmake)
 # Print all the configs that were called with announce_configured_options.
 print_configured_options()
 
-# MARK: - End EXECUTORCH_H12025_BUILD_MIGRATION ----------------------------------------------------
+# MARK: - End EXECUTORCH_H12025_BUILD_MIGRATION
 
 set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 
-# Setup RPATH.
-# See https://gitlab.kitware.com/cmake/community/-/wikis/doc/cmake/RPATH-handling
+# Setup RPATH. See
+# https://gitlab.kitware.com/cmake/community/-/wikis/doc/cmake/RPATH-handling
 # Use separate rpaths during build and install phases
 set(CMAKE_SKIP_BUILD_RPATH OFF)
 # Don't use the install-rpath during the build phase
 set(CMAKE_BUILD_WITH_INSTALL_RPATH ON)
 # Automatically add all linked folders that are NOT in the build directory to
 # the rpath (per library?)
-# TODO: Doesn't work for us right now because we are not installing .so's into the
-# correct locations. For example we have libcustom_ops_aot_lib.so depending on
-# _portable_lib.so, which was eventually put under <site-packages>/executorch/extension/pybindings/
-# but this rpath is not automatically added because at build time it seems `portable_lib`
-# is being built under the same directory, so no extra rpath is being added. To
-# properly fix this we need to install `portable_lib` into the correct path.
+# TODO: Doesn't work for us right now because we are
+# not installing .so's into the correct locations. For example we have
+# libcustom_ops_aot_lib.so depending on _portable_lib.so, which was eventually
+# put under <site-packages>/executorch/extension/pybindings/ but this rpath is
+# not automatically added because at build time it seems `portable_lib` is being
+# built under the same directory, so no extra rpath is being added. To properly
+# fix this we need to install `portable_lib` into the correct path.
 set(CMAKE_INSTALL_RPATH_USE_LINK_PATH ON)
 # ------------------------------ OPTIONS -------------------------------------
 # WARNING: Please don't add example specific options in this CMakeLists.txt.
@@ -177,7 +181,7 @@ endif()
 
 if(NOT DEFINED FXDIV_SOURCE_DIR)
   set(ORIGINAL_CMAKE_POSITION_INDEPENDENT_CODE_FLAG
-    ${CMAKE_POSITION_INDEPENDENT_CODE}
+      ${CMAKE_POSITION_INDEPENDENT_CODE}
   )
   set(FXDIV_SOURCE_DIR "backends/xnnpack/third-party/FXdiv")
   add_subdirectory("${FXDIV_SOURCE_DIR}")
@@ -276,7 +280,10 @@ if(NOT "${_repo_dir_name}" STREQUAL "executorch")
       "fix for this restriction."
   )
 endif()
-set(_common_include_directories ${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/runtime/core/portable_type/c10)
+set(_common_include_directories
+    ${CMAKE_CURRENT_SOURCE_DIR}/..
+    ${CMAKE_CURRENT_SOURCE_DIR}/runtime/core/portable_type/c10
+)
 
 #
 # The `_<target>_srcs` lists are defined by including ${EXECUTORCH_SRCS_FILE}.
@@ -310,9 +317,9 @@ endif()
 # Detect if an Android toolchain is set.
 if(CMAKE_TOOLCHAIN_FILE MATCHES ".*android\.toolchain\.cmake$")
   set(CMAKE_TOOLCHAIN_ANDROID ON)
-if(NOT ANDROID_PLATFORM)
-  set(ANDROID_PLATFORM android-30)
-endif()
+  if(NOT ANDROID_PLATFORM)
+    set(ANDROID_PLATFORM android-30)
+  endif()
 else()
   set(CMAKE_TOOLCHAIN_ANDROID OFF)
 endif()
@@ -334,7 +341,6 @@ if(EXECUTORCH_USE_CPP_CODE_COVERAGE)
   endif()
 endif()
 
-
 #
 # program_schema: Generated .h files from schema/*.fbs inputs
 #
@@ -376,7 +382,9 @@ endif()
 target_include_directories(
   executorch_core PUBLIC ${_common_include_directories}
 )
-target_compile_definitions(executorch_core PUBLIC C10_USING_CUSTOM_GENERATED_MACROS)
+target_compile_definitions(
+  executorch_core PUBLIC C10_USING_CUSTOM_GENERATED_MACROS
+)
 target_compile_options(executorch_core PUBLIC ${_common_compile_options})
 if(MAX_KERNEL_NUM)
   target_compile_definitions(
@@ -386,9 +394,7 @@ endif()
 
 if(EXECUTORCH_BUILD_PYBIND AND APPLE)
   # shared version
-  add_library(
-    executorch_core_shared SHARED ${_executorch_core__srcs}
-  )
+  add_library(executorch_core_shared SHARED ${_executorch_core__srcs})
   target_link_libraries(executorch_core_shared PRIVATE program_schema)
   if(DL_LIBRARY_EXISTS)
     # For dladdr()
@@ -397,7 +403,9 @@ if(EXECUTORCH_BUILD_PYBIND AND APPLE)
   target_include_directories(
     executorch_core_shared PUBLIC ${_common_include_directories}
   )
-  target_compile_definitions(executorch_core_shared PUBLIC C10_USING_CUSTOM_GENERATED_MACROS)
+  target_compile_definitions(
+    executorch_core_shared PUBLIC C10_USING_CUSTOM_GENERATED_MACROS
+  )
   target_compile_options(
     executorch_core_shared PUBLIC ${_common_compile_options}
   )
@@ -430,9 +438,8 @@ target_link_options_shared_lib(executorch)
 # operators necessary for the models that will run.
 #
 if(EXECUTORCH_BUILD_KERNELS_OPTIMIZED)
-  # find pytorch lib here to make it available to all
-  # sub-directories. Find it before including portable so that
-  # optimized_portable_kernels can use it.
+  # find pytorch lib here to make it available to all sub-directories. Find it
+  # before including portable so that optimized_portable_kernels can use it.
   find_package_torch_headers()
 endif()
 
@@ -458,19 +465,50 @@ endif()
 
 # Install `executorch` library as well as `executorch-config.cmake` under
 # ${CMAKE_INSTALL_PREFIX}/
-install(DIRECTORY runtime/core/  DESTINATION include/executorch/runtime/core FILES_MATCHING PATTERN "*.h")
-install(DIRECTORY runtime/kernel/  DESTINATION include/executorch/runtime/kernel FILES_MATCHING PATTERN "*.h")
-install(DIRECTORY runtime/platform/  DESTINATION include/executorch/runtime/platform FILES_MATCHING PATTERN "*.h")
-install(DIRECTORY extension/kernel_util/  DESTINATION include/executorch/extension/kernel_util FILES_MATCHING PATTERN "*.h")
-install(DIRECTORY extension/tensor/  DESTINATION include/executorch/extension/tensor FILES_MATCHING PATTERN "*.h")
-install(DIRECTORY extension/threadpool/  DESTINATION include/executorch/extension/threadpool FILES_MATCHING PATTERN "*.h")
+install(
+  DIRECTORY runtime/core/
+  DESTINATION include/executorch/runtime/core
+  FILES_MATCHING
+  PATTERN "*.h"
+)
+install(
+  DIRECTORY runtime/kernel/
+  DESTINATION include/executorch/runtime/kernel
+  FILES_MATCHING
+  PATTERN "*.h"
+)
+install(
+  DIRECTORY runtime/platform/
+  DESTINATION include/executorch/runtime/platform
+  FILES_MATCHING
+  PATTERN "*.h"
+)
+install(
+  DIRECTORY extension/kernel_util/
+  DESTINATION include/executorch/extension/kernel_util
+  FILES_MATCHING
+  PATTERN "*.h"
+)
+install(
+  DIRECTORY extension/tensor/
+  DESTINATION include/executorch/extension/tensor
+  FILES_MATCHING
+  PATTERN "*.h"
+)
+install(
+  DIRECTORY extension/threadpool/
+  DESTINATION include/executorch/extension/threadpool
+  FILES_MATCHING
+  PATTERN "*.h"
+)
 install(
   TARGETS executorch executorch_core
-  DESTINATION lib
   INCLUDES
   DESTINATION ${_common_include_directories}
 )
-install(FILES tools/cmake/executorch-config.cmake DESTINATION lib/cmake/ExecuTorch)
+install(FILES tools/cmake/executorch-config.cmake
+        DESTINATION lib/cmake/ExecuTorch
+)
 
 if(EXECUTORCH_BUILD_ARM_BAREMETAL)
   add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/arm)
@@ -608,17 +646,14 @@ if(EXECUTORCH_BUILD_PYBIND)
   endif()
 
   if(EXECUTORCH_BUILD_XNNPACK)
-    # need to explicitly specify XNNPACK and microkernels-prod
-    # here otherwise uses XNNPACK and microkernel-prod symbols from libtorch_cpu
+    # need to explicitly specify XNNPACK and microkernels-prod here otherwise
+    # uses XNNPACK and microkernel-prod symbols from libtorch_cpu
     list(APPEND _dep_libs xnnpack_backend XNNPACK microkernels-prod)
   endif()
 
   # compile options for pybind
-  set(_pybind_compile_options
-      -Wno-deprecated-declarations
-      -fPIC
-      -frtti
-      -fexceptions
+  set(_pybind_compile_options -Wno-deprecated-declarations -fPIC -frtti
+                              -fexceptions
   )
 
   # util lib
diff --git a/CMakePresets.json b/CMakePresets.json
index 315084f59ae..9ea91fab343 100644
--- a/CMakePresets.json
+++ b/CMakePresets.json
@@ -15,7 +15,7 @@
         "CMAKE_TOOLCHAIN_FILE": "${sourceDir}/third-party/ios-cmake/ios.toolchain.cmake",
         "EXECUTORCH_BUILD_PRESET_FILE": "${sourceDir}/tools/cmake/preset/macos.cmake",
         "PLATFORM": "MAC_ARM64",
-        "DEPLOYMENT_TARGET": "10.15"
+        "DEPLOYMENT_TARGET": "12.0"
       },
       "condition": {
         "lhs": "${hostSystemName}",
@@ -77,7 +77,7 @@
       "inherits": ["common"],
       "cacheVariables": {
         "EXECUTORCH_BUILD_PRESET_FILE": "${sourceDir}/tools/cmake/preset/pybind.cmake",
-        "CMAKE_OSX_DEPLOYMENT_TARGET": "10.15"
+        "CMAKE_OSX_DEPLOYMENT_TARGET": "12.0"
       },
       "condition": {
         "type": "inList",
@@ -93,7 +93,7 @@
         ],
         "cacheVariables": {
             "EXECUTORCH_BUILD_PRESET_FILE": "${sourceDir}/tools/cmake/preset/llm.cmake",
-            "CMAKE_OSX_DEPLOYMENT_TARGET": "10.15"
+            "CMAKE_OSX_DEPLOYMENT_TARGET": "12.0"
         },
         "condition": {
             "type": "inList",
diff --git a/Package.swift b/Package.swift
index 6ee8debc413..43760822c19 100644
--- a/Package.swift
+++ b/Package.swift
@@ -22,7 +22,27 @@ import PackageDescription
 let debug_suffix = "_debug"
 let dependencies_suffix = "_with_dependencies"
 
-let products = [
+func deliverables(_ dict: [String: [String: Any]]) -> [String: [String: Any]] {
+  dict
+    .reduce(into: [String: [String: Any]]()) { result, pair in
+      let (key, value) = pair
+      result[key] = value
+      result[key + debug_suffix] = value
+    }
+    .reduce(into: [String: [String: Any]]()) { result, pair in
+      let (key, value) = pair
+      var newValue = value
+      if key.hasSuffix(debug_suffix) {
+        for (k, v) in value where k.hasSuffix(debug_suffix) {
+          let trimmed = String(k.dropLast(debug_suffix.count))
+          newValue[trimmed] = v
+        }
+      }
+      result[key] = newValue.filter { !$0.key.hasSuffix(debug_suffix) }
+    }
+}
+
+let products = deliverables([
   "backend_coreml": [
     "frameworks": [
       "Accelerate",
@@ -58,50 +78,52 @@ let products = [
       "threadpool",
     ],
   ],
-  "kernels_portable": [:],
   "kernels_quantized": [:],
-].reduce(into: [String: [String: Any]]()) {
-  $0[$1.key] = $1.value
-  $0[$1.key + debug_suffix] = $1.value
+])
+
+let targets = deliverables([
+  "threadpool": [:],
+])
+
+let packageProducts: [Product] = products.keys.map { key -> Product in
+  .library(name: key, targets: ["\(key)\(dependencies_suffix)"])
+}.sorted { $0.name < $1.name }
+
+var packageTargets: [Target] = []
+
+for (key, value) in targets {
+  packageTargets.append(.binaryTarget(
+    name: key,
+    path: "cmake-out/\(key).xcframework"
+  ))
 }
 
-let targets = [
-  "threadpool",
-].flatMap { [$0, $0 + debug_suffix] }
+for (key, value) in products {
+  packageTargets.append(.binaryTarget(
+    name: key,
+    path: "cmake-out/\(key).xcframework"
+  ))
+  let target: Target = .target(
+    name: "\(key)\(dependencies_suffix)",
+    dependencies: ([key] + (value["targets"] as? [String] ?? []).map {
+      key.hasSuffix(debug_suffix) ? $0 + debug_suffix : $0
+    }).map { .target(name: $0) },
+    path: ".Package.swift/\(key)",
+    linkerSettings:
+      (value["frameworks"] as? [String] ?? []).map { .linkedFramework($0) } +
+      (value["libraries"] as? [String] ?? []).map { .linkedLibrary($0) }
+  )
+  packageTargets.append(target)
+}
 
 let package = Package(
   name: "executorch",
   platforms: [
     .iOS(.v17),
-    .macOS(.v10_15),
+    .macOS(.v12),
   ],
-  products: products.keys.map { key in
-    .library(name: key, targets: ["\(key)\(dependencies_suffix)"])
-  }.sorted { $0.name < $1.name },
-  targets: targets.map { key in
-    .binaryTarget(
-      name: key,
-      path: "cmake-out/\(key).xcframework"
-    )
-  } + products.flatMap { key, value -> [Target] in
-    [
-      .binaryTarget(
-        name: key,
-        path: "cmake-out/\(key).xcframework"
-      ),
-      .target(
-        name: "\(key)\(dependencies_suffix)",
-        dependencies:([key] +
-          (value["targets"] as? [String] ?? []).map {
-            target in key.hasSuffix(debug_suffix) ? target + debug_suffix : target
-          }).map { .target(name: $0) },
-        path: ".Package.swift/\(key)",
-        linkerSettings:
-          (value["frameworks"] as? [String] ?? []).map { .linkedFramework($0) } +
-          (value["libraries"] as? [String] ?? []).map { .linkedLibrary($0) }
-      ),
-    ]
-  } + [
+  products: packageProducts,
+  targets: packageTargets + [
     .testTarget(
       name: "tests",
       dependencies: [
diff --git a/backends/apple/coreml/runtime/delegate/ETCoreMLDefaultModelExecutor.mm b/backends/apple/coreml/runtime/delegate/ETCoreMLDefaultModelExecutor.mm
index 63bc60695ce..8f36087dcc6 100644
--- a/backends/apple/coreml/runtime/delegate/ETCoreMLDefaultModelExecutor.mm
+++ b/backends/apple/coreml/runtime/delegate/ETCoreMLDefaultModelExecutor.mm
@@ -27,7 +27,7 @@ - (instancetype)initWithModel:(ETCoreMLModel *)model {
                                                  eventLogger:(const executorchcoreml::ModelEventLogger* _Nullable __unused)eventLogger
                                                        error:(NSError * __autoreleasing *)error {
     if (self.ignoreOutputBackings) {
-        if (@available(macOS 11.0, iOS 16.0, tvOS 16.0, watchOS 9.0, *)) {
+        if (@available(iOS 16.0, tvOS 16.0, watchOS 9.0, *)) {
             predictionOptions.outputBackings = @{};
         }
     }
diff --git a/backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.mm b/backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.mm
index a64d977bb26..f4cfd2146ac 100644
--- a/backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.mm
+++ b/backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.mm
@@ -92,7 +92,7 @@ BOOL is_backed_by_same_buffer(MLMultiArray *array1, MLMultiArray *array2) {
                                             NSOrderedSet<NSString *> *output_names,
                                             NSError * __autoreleasing *error) {
     MLPredictionOptions *options = [MLPredictionOptions new];
-    if (@available(macOS 11.0, iOS 16.0, tvOS 16.0, watchOS 9.0, *)) {
+    if (@available(iOS 16.0, tvOS 16.0, watchOS 9.0, *)) {
         NSMutableDictionary<NSString *, id> *output_backings = [NSMutableDictionary dictionary];
         NSEnumerator<NSString *> *enumerator = [output_names objectEnumerator];
         for (MLMultiArray *output in outputs) {
@@ -687,7 +687,7 @@ - (void)addPrewarmedAsset:(ETCoreMLAsset *)asset {
                                                                  eventLogger:eventLogger
                                                                        error:&localError];
     // Try without output backings.
-    if (@available(macOS 11.0, iOS 16.0, tvOS 16.0, watchOS 9.0, *)) {
+    if (@available(iOS 16.0, tvOS 16.0, watchOS 9.0, *)) {
         if (!modelOutputs && predictionOptions.outputBackings.count > 0) {
             executor.ignoreOutputBackings = YES;
             localError = nil;
diff --git a/backends/apple/mps/install_requirements.sh b/backends/apple/mps/install_requirements.sh
deleted file mode 100755
index 1bc663d9d6d..00000000000
--- a/backends/apple/mps/install_requirements.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-#!/bin/bash
-#
-#  Copyright (c) 2023 Apple Inc. All rights reserved.
-#  Provided subject to the LICENSE file in the top level directory.
-#
-
-# Install required python dependencies for using the MPS Backend
-pip install --force-reinstall ninja
diff --git a/backends/apple/mps/setup.md b/backends/apple/mps/setup.md
index b35983514db..0ecb4151e61 100644
--- a/backends/apple/mps/setup.md
+++ b/backends/apple/mps/setup.md
@@ -42,12 +42,6 @@ In order to be able to successfully build and run a model using the MPS backend
 
 ***Step 1.*** Please finish tutorial [Setting up ExecuTorch](https://pytorch.org/executorch/main/getting-started-setup).
 
-***Step 2.*** Install dependencies needed to lower MPS delegate:
-
-  ```bash
-  ./backends/apple/mps/install_requirements.sh
-  ```
-
 ## Build
 
 ### AOT (Ahead-of-time) Components
@@ -97,7 +91,7 @@ I 00:00:00.122615 executorch:mps_executor_runner.mm:501] Model verified successf
 ### [Optional] Run the generated model directly using pybind
 1. Make sure `pybind` MPS support was installed:
 ```bash
-./install_executorch.sh --pybind mps
+CMAKE_ARGS="-DEXECUTORCH_BUILD_MPS=ON" ./install_executorch.sh
 ```
 2. Run the `mps_example` script to trace the model and run it directly from python:
 ```bash
diff --git a/backends/arm/_passes/__init__.py b/backends/arm/_passes/__init__.py
index 22bf7f4c013..f207d85ebd7 100644
--- a/backends/arm/_passes/__init__.py
+++ b/backends/arm/_passes/__init__.py
@@ -23,6 +23,7 @@
 from .decompose_cosine_similarity_pass import DecomposeCosineSimilarityPass  # noqa
 from .decompose_div_pass import DecomposeDivPass  # noqa
 from .decompose_gelu_pass import DecomposeGeluPass  # noqa
+from .decompose_groupnorm_pass import DecomposeGroupNormPass  # noqa
 from .decompose_layernorm_pass import DecomposeLayerNormPass  # noqa
 from .decompose_leaky_relu_pass import DecomposeLeakyReLUPass  # noqa
 from .decompose_linalg_vector_norm_pass import DecomposeLinearVectorNormPass  # noqa
diff --git a/backends/arm/_passes/arm_pass_manager.py b/backends/arm/_passes/arm_pass_manager.py
index d2ab9dcb9ef..02a4edd398b 100644
--- a/backends/arm/_passes/arm_pass_manager.py
+++ b/backends/arm/_passes/arm_pass_manager.py
@@ -27,6 +27,7 @@
     DecomposeCosineSimilarityPass,
     DecomposeDivPass,
     DecomposeGeluPass,
+    DecomposeGroupNormPass,
     DecomposeLayerNormPass,
     DecomposeLeakyReLUPass,
     DecomposeLinearPass,
@@ -141,6 +142,7 @@ def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
         self.add_pass(ConvertMmToBmmPass())
         self.add_pass(DecomposeLinearPass())
         self.add_pass(DecomposeLeakyReLUPass())
+        self.add_pass(DecomposeGroupNormPass())
         self.add_pass(DecomposeLayerNormPass())
         self.add_pass(DecomposeVarPass())
         self.add_pass(
@@ -208,6 +210,7 @@ def transform_for_annotation_pipeline(self, graph_module: GraphModule):
         self.add_pass(DecomposeScaledDotProductAttention())
         self.add_pass(ReplaceScalarWithTensorArgPassTOSABI())
         self.add_pass(ScalarsToAttributePass())
+        self.add_pass(DecomposeGroupNormPass())
         self.add_pass(DecomposeLayerNormPass())
         self.add_pass(DecomposeVarPass())
         self.add_pass(DecomposeMeanDimPass(graph_module, self.tosa_spec))
diff --git a/backends/arm/_passes/decompose_groupnorm_pass.py b/backends/arm/_passes/decompose_groupnorm_pass.py
new file mode 100644
index 00000000000..c6cb1b05e40
--- /dev/null
+++ b/backends/arm/_passes/decompose_groupnorm_pass.py
@@ -0,0 +1,208 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# pyre-unsafe
+
+import operator
+
+import torch
+from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes.arm_pass_utils import create_node
+from executorch.exir.dialects._ops import ops as exir_ops
+from executorch.exir.pass_base import PassResult
+
+
+def get_group_norm_decomposition(op) -> tuple:
+    if op == exir_ops.edge.aten.native_group_norm.default:
+        return (
+            exir_ops.edge.aten.mean.dim,
+            exir_ops.edge.aten.sub.Tensor,
+            exir_ops.edge.aten.var.correction,
+            exir_ops.edge.aten.full.default,
+            exir_ops.edge.aten.add.Tensor,
+            exir_ops.edge.aten.rsqrt.default,
+            exir_ops.edge.aten.mul.Tensor,
+            exir_ops.edge.aten.view_copy.default,
+        )
+    if op == torch.ops.aten.group_norm.default:
+        return (
+            torch.ops.aten.mean.dim,
+            torch.ops.aten.sub.Tensor,
+            torch.ops.aten.var.correction,
+            torch.ops.aten.full.default,
+            torch.ops.aten.add.Tensor,
+            torch.ops.aten.rsqrt.default,
+            torch.ops.aten.mul.Tensor,
+            torch.ops.aten.view_copy.default,
+        )
+    raise RuntimeError(f"Can't get group_norm composition for op {op}")
+
+
+class DecomposeGroupNormPass(ArmPass):
+    """
+    groupnorm is defined as: ((x - E[x]) / sqrt(Var[x] + eps)) * weights + bias
+    Decompose groupnorm(x, weight, bias, N, C, HxW, group, eps) to a sequence of:
+    mean        = op_mean(x, dims)           # E[x]
+    var         = op_var(x, dims)            # Var[x]
+    numerator   = op_sub(x, mean)            # (x - E[x])
+    add         = op_add(var, eps)           # Var[x] + eps
+    rsqrt       = op_rsqrt(add)              # 1 / sqrt(Var[x] + eps)
+    mul         = op_mul(numerator, rsqrt)   # ((x - E[x]) / sqrt(Var[x] + eps))
+    weigths     = op_mul(mul, weigths)       # ((x - E[x]) / sqrt(Var[x] + eps)) * weigths
+    bias        = op_add(weigths, bias)      # ((x - E[x]) / sqrt(Var[x] + eps)) * weigths + bias
+    where x can viewed with shape [N, group, C//group, HxW] dims=[C//group, HxW]
+
+    Source: https://pytorch.org/docs/stable/generated/torch.nn.GroupNorm.html
+    """
+
+    def call(self, graph_module: torch.fx.GraphModule):
+        modified = False
+        for node in graph_module.graph.nodes:
+            if node.op != "call_function" or node.target not in (
+                exir_ops.edge.aten.native_group_norm.default,
+                torch.ops.aten.group_norm.default,
+            ):
+                continue
+
+            # epsilon default value
+            eps = torch.finfo().eps
+            weights = None
+            bias = None
+            args = node.args
+            meta = node.meta
+            if isinstance(meta["val"], tuple):
+                shape = meta["val"][0].size()
+                dtype = meta["val"][0].dtype
+            else:
+                shape = meta["val"].size()
+                dtype = meta["val"].dtype
+            match len(args):
+                # MI profile always provides all the args: x, weight, bias, N, C, HxW, group, eps
+                case 8:
+                    x, weights, bias, N, C, HxW, group, eps = args
+                # BI profile: affine=[True|False], eps!=1e-5
+                case 5:
+                    x, group, weights, bias, eps = args
+                # BI profile: affine=True, eps=1e-5
+                case 4:
+                    x, group, weights, bias = args
+                # BI profile: affine=False, eps=1e=5
+                case 2:
+                    x, group = args
+                # Unsupported args
+                case _:
+                    raise ValueError(
+                        f"Unsupported group_norm argument pattern with {len(args)} args"
+                    )
+            N = shape[0]
+            C = shape[1]
+            HxW = 1
+            for dim in shape[2:]:
+                HxW *= dim
+            channels_per_group = C // group
+            grouped_shape = torch.Size([N, group, channels_per_group, HxW])
+            dims = [2, 3]
+            epsilon_reshaped_shape = torch.Size([1] * len(grouped_shape))
+            weights_reshaped_shape = torch.Size([1, group, channels_per_group, 1])
+            (
+                mean_op,
+                sub_op,
+                var_op,
+                full_op,
+                add_op,
+                rsqrt_op,
+                mul_op,
+                view_op,
+            ) = get_group_norm_decomposition(node.target)
+            with graph_module.graph.inserting_before(node):
+                keepdim = True
+                x_reshaped = create_node(
+                    graph_module.graph,
+                    view_op,
+                    args=(x, grouped_shape),
+                    from_node=node,
+                )
+                mean = create_node(
+                    graph_module.graph, mean_op, args=(x_reshaped, dims, keepdim)
+                )
+                sub = create_node(graph_module.graph, sub_op, args=(x_reshaped, mean))
+                var = create_node(
+                    graph_module.graph,
+                    var_op,
+                    args=(x_reshaped, dims),
+                    kwargs={"correction": 0, "keepdim": keepdim},
+                    from_node=node,
+                )
+                full = create_node(
+                    graph_module.graph,
+                    full_op,
+                    args=(epsilon_reshaped_shape, eps),
+                    kwargs={"dtype": dtype},
+                    from_node=node,
+                )
+                add0 = create_node(
+                    graph_module.graph, add_op, args=(var, full), from_node=node
+                )
+                rsqrt = create_node(
+                    graph_module.graph, rsqrt_op, args=(add0,), from_node=node
+                )
+                mul0 = create_node(
+                    graph_module.graph, mul_op, args=(sub, rsqrt), from_node=node
+                )
+                if weights is not None:
+                    weights_reshaped = create_node(
+                        graph_module.graph,
+                        view_op,
+                        args=(weights, weights_reshaped_shape),
+                        from_node=node,
+                    )
+                    mul1 = create_node(
+                        graph_module.graph,
+                        mul_op,
+                        args=(
+                            mul0,
+                            weights_reshaped,
+                        ),
+                        from_node=node,
+                    )
+                else:
+                    mul1 = mul0
+                if bias is not None:
+                    bias_reshaped_shape = weights_reshaped_shape
+                    bias_reshaped = create_node(
+                        graph_module.graph,
+                        view_op,
+                        args=(bias, bias_reshaped_shape),
+                        from_node=node,
+                    )
+                    output = create_node(
+                        graph_module.graph,
+                        add_op,
+                        args=(mul1, bias_reshaped),
+                        from_node=node,
+                    )
+                else:
+                    output = mul1
+
+                output_reshaped = create_node(
+                    graph_module.graph,
+                    view_op,
+                    args=(output, shape),
+                    from_node=node,
+                )
+
+                users = [user for user in node.users if node != user]
+                node.replace_all_uses_with(output_reshaped)
+                for user in users:
+                    if user.target == operator.getitem:
+                        user.replace_all_uses_with(output_reshaped)
+                graph_module.graph.erase_node(node)
+                graph_module.graph.eliminate_dead_code()
+                modified = True
+        if modified:
+            graph_module.recompile()
+            graph_module = super().call(graph_module).graph_module
+
+        return PassResult(graph_module, modified)
diff --git a/backends/arm/_passes/decompose_layernorm_pass.py b/backends/arm/_passes/decompose_layernorm_pass.py
index a92434faa7d..e6cbdfb91a0 100644
--- a/backends/arm/_passes/decompose_layernorm_pass.py
+++ b/backends/arm/_passes/decompose_layernorm_pass.py
@@ -1,5 +1,4 @@
 # Copyright 2024-2025 Arm Limited and/or its affiliates.
-# All rights reserved.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
@@ -47,11 +46,12 @@ class DecomposeLayerNormPass(ArmPass):
     Decompose layernorm(x, normalized_shape, weights, bias, eps) to a sequence of:
     mean        = op_mean(x, dims)           # E[x]
     var         = op_var(x, dims)            # Var[x]
-    denominator = op_sub(x, mean)            # (x - E[x])
+    numerator   = op_sub(x, mean)            # (x - E[x])
     add         = op_add(var, eps)           # Var[x] + eps
     rsqrt       = op_rsqrt(add)              # 1 / sqrt(Var[x] + eps)
-    mul         = op_mul(denominator, rsqrt) # ((x - E[x]) / sqrt(Var[x] + eps)) * weigths
-    bias        = op_add(mul, bias)          # ((x - E[x]) / sqrt(Var[x] + eps)) * weigths + bias
+    mul         = op_mul(numerator, rsqrt)   # ((x - E[x]) / sqrt(Var[x] + eps))
+    weigths     = op_mul(mul, weigths)       # ((x - E[x]) / sqrt(Var[x] + eps)) * weigths
+    bias        = op_add(weigths, bias)      # ((x - E[x]) / sqrt(Var[x] + eps)) * weigths + bias
 
     Source: https://pytorch.org/docs/stable/generated/torch.nn.LayerNorm.html
     """
diff --git a/backends/arm/operator_support/tosa_supported_operators.py b/backends/arm/operator_support/tosa_supported_operators.py
index 766e98688d9..36ae77d26a3 100644
--- a/backends/arm/operator_support/tosa_supported_operators.py
+++ b/backends/arm/operator_support/tosa_supported_operators.py
@@ -198,6 +198,7 @@ def is_node_supported(
             exir_ops.edge.aten.div.Scalar,
             exir_ops.edge.aten._native_batch_norm_legit_no_training.default,
             exir_ops.edge.aten.native_layer_norm.default,
+            exir_ops.edge.aten.native_group_norm.default,
             exir_ops.edge.aten.sigmoid.default,
             exir_ops.edge.aten.mean.dim,
             exir_ops.edge.aten.mm.default,
@@ -264,6 +265,7 @@ def is_node_supported(
             exir_ops.edge.aten.div.Tensor: None,
             exir_ops.edge.aten._native_batch_norm_legit_no_training.default: "BatchNorm2D with track_running_stats==True not immediately following a convolution is not supported for quantized TOSA backends.",
             exir_ops.edge.aten.native_layer_norm.default: None,
+            exir_ops.edge.aten.native_group_norm.default: None,
             exir_ops.edge.aten._softmax.default: None,
             exir_ops.edge.aten._log_softmax.default: None,
             exir_ops.edge.aten.var.correction: None,
diff --git a/backends/arm/operators/op_conv2d.py b/backends/arm/operators/op_conv2d.py
index fdbb20fbe18..a566b0fbfa7 100644
--- a/backends/arm/operators/op_conv2d.py
+++ b/backends/arm/operators/op_conv2d.py
@@ -6,7 +6,6 @@
 # pyre-unsafe
 from typing import Any, List
 
-import numpy as np
 import torch
 
 from executorch.backends.arm._passes.fold_qdq_with_annotated_qparams_pass import (
@@ -333,21 +332,22 @@ def define_node(
                 weight.dtype,
             )
             shape = tosa_graph.addConst(
-                np.array(weight_post_shape).shape,
+                [len(weight_post_shape)],
                 ts.DType.SHAPE,
-                np.array(weight_post_shape),
+                weight_post_shape,
                 name=weight_reshaped.name + "_shape",
             )
 
-            attr = ts.TosaSerializerAttribute()
-            attr.ReshapeAttribute()
+            reshape_attr = ts.TosaSerializerAttribute()
+            reshape_attr.ReshapeAttribute()
             tosa_graph.addOperator(
                 ts.TosaOp.Op().RESHAPE,
                 [weight.name, shape.name],
                 [weight_reshaped.name],
-                attr,
+                reshape_attr,
             )
 
+            attr = ts.TosaSerializerAttribute()
             tosa_op = ts.TosaOp.Op().DEPTHWISE_CONV2D
             weight_name = weight_reshaped.name
 
diff --git a/backends/arm/operators/op_view.py b/backends/arm/operators/op_view.py
index e7a062bbf22..d8ac85ec63a 100644
--- a/backends/arm/operators/op_view.py
+++ b/backends/arm/operators/op_view.py
@@ -74,14 +74,14 @@ def define_node(
         tosa_graph = cast(ts.TosaSerializer, tosa_graph)
 
         if len(output.shape) != 0:
-            shape_len = len(output.shape)
+            shape_len = [len(output.shape)]
             shape_data = list(tosa_shape(output.shape, output.dim_order))
         else:
-            shape_len = 1
-            shape_data = [0]
+            shape_len = []
+            shape_data = []
 
         shape = tosa_graph.addConst(
-            [shape_len],
+            shape_len,
             ts.DType.SHAPE,
             shape_data,
             name=node.name + "_shape",
diff --git a/backends/arm/scripts/parse_test_names.py b/backends/arm/scripts/parse_test_names.py
index 62ff93ebc91..d4342e428b6 100644
--- a/backends/arm/scripts/parse_test_names.py
+++ b/backends/arm/scripts/parse_test_names.py
@@ -16,6 +16,7 @@
     "adaptive_avg_pool2d.default",
     "bitwise_right_shift.Tensor",
     "bitwise_left_shift.Tensor",
+    "native_group_norm.default",
     "_native_batch_norm_legit_no_training.default",
     "_native_batch_norm_legit.no_stats",
 ]
diff --git a/backends/arm/scripts/pre-push b/backends/arm/scripts/pre-push
index b9a5b567a42..a4e877fdcfc 100755
--- a/backends/arm/scripts/pre-push
+++ b/backends/arm/scripts/pre-push
@@ -4,9 +4,9 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-# Calling this script with any argument is equal to launching it in
+# Calling this script with one argument is equal to launching it in
 # non-interactive mode. "$#" gives the number of positional arguments.
-[ "$#" -eq 0 ] && is_script_interactive=1 || is_script_interactive=0
+[ "$#" -eq 1 ] && is_script_interactive=1 || is_script_interactive=0
 
 if [ $is_script_interactive -eq 1 ]; then
     RESET='\e[0m'
diff --git a/backends/arm/test/ops/test_group_norm.py b/backends/arm/test/ops/test_group_norm.py
new file mode 100644
index 00000000000..9c5517d9dae
--- /dev/null
+++ b/backends/arm/test/ops/test_group_norm.py
@@ -0,0 +1,145 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+from executorch.backends.arm.test import common
+from executorch.backends.arm.test.tester.test_pipeline import (
+    EthosU55PipelineBI,
+    EthosU85PipelineBI,
+    TosaPipelineBI,
+    TosaPipelineMI,
+)
+
+
+class GroupNorm(torch.nn.Module):
+
+    def __init__(
+        self,
+        num_groups: int,
+        num_channels: int,
+        eps: float = 1e-5,
+        affine: bool = True,
+    ):
+        super().__init__()
+        self.group_norm = torch.nn.GroupNorm(
+            num_groups,
+            num_channels,
+            eps=eps,
+            affine=affine,
+        )
+
+    def forward(
+        self,
+        x: torch.Tensor,
+    ):
+        return self.group_norm(x)
+
+
+input_t = tuple[torch.Tensor]
+test_data_suite = {
+    "rand_4_6_groups_1": ((torch.rand(4, 6),), GroupNorm(1, 6)),
+    "rand_4_6_groups_2": ((torch.rand(4, 6),), GroupNorm(2, 6)),
+    "rand_4_6_groups_6": ((torch.rand(4, 6),), GroupNorm(6, 6)),
+    "rand_4_6_8_groups_2_eps_no_affine": (
+        (torch.rand(4, 6, 8),),
+        GroupNorm(2, 6, eps=1e-3, affine=False),
+    ),
+    "randn_1_12_8_6_groups_6_eps": (
+        (torch.randn(1, 12, 8, 6),),
+        GroupNorm(6, 12, eps=1e-2),
+    ),
+    "randn_1_12_8_6_groups_12": ((torch.randn(1, 12, 8, 6),), GroupNorm(12, 12)),
+    "rand_6_8_10_12_groups_1": ((torch.rand(6, 8, 10, 12),), GroupNorm(1, 8)),
+    "rand_6_8_10_12_groups_4_no_affine": (
+        (torch.rand(6, 8, 10, 12),),
+        GroupNorm(4, 8, affine=False),
+    ),
+    "rand_6_8_10_12_groups_8": ((torch.rand(6, 8, 10, 12),), GroupNorm(8, 8)),
+}
+
+
+@common.parametrize("test_data", test_data_suite)
+def test_native_group_norm_tosa_MI(test_data):
+    aten_op = "torch.ops.aten.group_norm.default"
+    exir_op = "executorch_exir_dialects_edge__ops_aten_native_group_norm_default"
+    pipeline = TosaPipelineMI[input_t](
+        test_data[1],
+        test_data[0],
+        aten_op=aten_op,
+        exir_op=exir_op,
+    )
+    pipeline.run()
+
+
+@common.parametrize(
+    "test_data",
+    test_data_suite,
+    xfails={
+        "randn_1_12_8_6_groups_12": "MLETORCH-925: Fix numerical issue for aten.native_group_norm",
+        "rand_6_8_10_12_groups_1": "MLETORCH-925: Fix numerical issue for aten.native_group_norm",
+        "rand_6_8_10_12_groups_4_no_affine": "MLETORCH-925: Fix numerical issue for aten.native_group_norm",
+        "rand_6_8_10_12_groups_8": "MLETORCH-925: Fix numerical issue for aten.native_group_norm",
+    },
+    strict=False,
+)
+def test_native_group_norm_tosa_BI(test_data):
+    aten_op = "torch.ops.aten.sub.Tensor"  # 'sub' op arbitrarily chosen to confirm groupnorm was decomposed
+    exir_op = "executorch_exir_dialects_edge__ops_aten_native_group_norm_default"
+    pipeline = TosaPipelineBI[input_t](
+        test_data[1],
+        test_data[0],
+        aten_op=aten_op,
+        exir_op=exir_op,
+        atol=0.1,  # TODO: "MLETORCH-925: Fix numerical issue for aten.native_group_norm"
+    )
+    pipeline.run()
+
+
+@common.parametrize(
+    "test_data",
+    test_data_suite,
+    xfails={
+        "randn_1_12_8_6_groups_12": "MLETORCH-925: Fix numerical issue for aten.native_group_norm",
+        "rand_6_8_10_12_groups_1": "MLETORCH-925: Fix numerical issue for aten.native_group_norm",
+        "rand_6_8_10_12_groups_4_no_affine": "MLETORCH-925: Fix numerical issue for aten.native_group_norm",
+        "rand_6_8_10_12_groups_8": "MLETORCH-925: Fix numerical issue for aten.native_group_norm",
+    },
+    strict=False,
+)
+@common.XfailIfNoCorstone300
+def test_native_group_norm_u55_BI(test_data):
+    pipeline = EthosU55PipelineBI[input_t](
+        test_data[1],
+        test_data[0],
+        "torch.ops.aten.sub.Tensor",  # 'sub' op arbitrarily chosen to confirm groupnorm was decomposed
+        run_on_fvp=True,
+        atol=0.1,  # TODO: "MLETORCH-925: Fix numerical issue for aten.native_group_norm"
+    )
+    pipeline.change_args("run_method_and_compare_outputs", atol=1, qtol=1)
+    pipeline.run()
+
+
+@common.parametrize(
+    "test_data",
+    test_data_suite,
+    xfails={
+        "randn_1_12_8_6_groups_12": "MLETORCH-925: Fix numerical issue for aten.native_group_norm",
+        "rand_6_8_10_12_groups_1": "MLETORCH-925: Fix numerical issue for aten.native_group_norm",
+        "rand_6_8_10_12_groups_4_no_affine": "MLETORCH-925: Fix numerical issue for aten.native_group_norm",
+        "rand_6_8_10_12_groups_8": "MLETORCH-925: Fix numerical issue for aten.native_group_norm",
+    },
+    strict=False,
+)
+@common.XfailIfNoCorstone320
+def test_native_group_norm_u85_BI(test_data):
+    pipeline = EthosU85PipelineBI[input_t](
+        test_data[1],
+        test_data[0],
+        "torch.ops.aten.sub.Tensor",  # 'sub' op arbitrarily chosen to confirm groupnorm was decomposed
+        run_on_fvp=True,
+        atol=0.1,  # TODO: "MLETORCH-925: Fix numerical issue for aten.native_group_norm"
+    )
+    pipeline.change_args("run_method_and_compare_outputs", atol=1, qtol=1)
+    pipeline.run()
diff --git a/backends/arm/test/ops/test_linalg_vector_norm.py b/backends/arm/test/ops/test_linalg_vector_norm.py
index 36533d786dd..27e4bef97e6 100644
--- a/backends/arm/test/ops/test_linalg_vector_norm.py
+++ b/backends/arm/test/ops/test_linalg_vector_norm.py
@@ -72,7 +72,6 @@ def test_vector_norm_tosa_MI(test_module):
 
     pipeline = TosaPipelineMI[input_t](model, input_tensor, aten_op, exir_op)
 
-    pipeline.change_args("run_method_and_compare_outputs", qtol=1, atol=1e-4, rtol=1e-4)
     pipeline.run()
 
 
@@ -90,7 +89,6 @@ def test_vector_norm_tosa_BI(test_module):
         exir_op,
         symmetric_io_quantization=True,
     )
-    pipeline.change_args("run_method_and_compare_outputs", qtol=1, atol=1, rtol=1)
     pipeline.run()
 
 
@@ -107,13 +105,12 @@ def test_vector_norm_u55_BI_fvp(test_module):
         run_on_fvp=True,
         symmetric_io_quantization=True,
     )
-    pipeline.change_args("run_method_and_compare_outputs", qtol=1, atol=1, rtol=1)
     pipeline.pop_stage("check_not.exir")
     pipeline.run()
 
 
 @common.parametrize("test_module", test_modules)
-@common.XfailIfNoCorstone300
+@common.XfailIfNoCorstone320
 def test_vector_norm_u85_BI_fvp(test_module):
     model, input_tensor = test_module
 
@@ -126,6 +123,5 @@ def test_vector_norm_u85_BI_fvp(test_module):
         run_on_fvp=True,
         symmetric_io_quantization=True,
     )
-    pipeline.change_args("run_method_and_compare_outputs", qtol=1, atol=1, rtol=1)
     pipeline.pop_stage("check_not.exir")
     pipeline.run()
diff --git a/backends/cadence/aot/TARGETS b/backends/cadence/aot/TARGETS
index 6b2b61729ed..1613cfb28ca 100644
--- a/backends/cadence/aot/TARGETS
+++ b/backends/cadence/aot/TARGETS
@@ -433,6 +433,7 @@ python_unittest(
     srcs = [
         "tests/test_memory_passes.py",
     ],
+    supports_static_listing = False,
     typing = True,
     deps = [
         ":compiler",
@@ -441,7 +442,9 @@ python_unittest(
         ":pass_utils",
         "//caffe2:torch",
         "//executorch/exir:memory",
+        "fbsource//third-party/pypi/parameterized:parameterized",
         "//executorch/exir/dialects:lib",
+        "//executorch/backends/cadence/aot:graph_builder",
         "//executorch/exir/tests:models",
     ],
 )
diff --git a/backends/cadence/aot/memory_constraints.py b/backends/cadence/aot/memory_constraints.py
index 3de140e4647..377e6fc81e6 100644
--- a/backends/cadence/aot/memory_constraints.py
+++ b/backends/cadence/aot/memory_constraints.py
@@ -350,14 +350,28 @@ def is_slice_view(self, node: torch.fx.Node) -> bool:
     def is_cat_along_outermost_dim(
         self, graph_module: torch.fx.GraphModule, cat_node: torch.fx.Node
     ) -> bool:
+        assert len(cat_node.args) > 0
+        cat_tensors = cat_node.args[0]
+        if not isinstance(cat_tensors, Sequence) or not all(
+            isinstance(t, torch.fx.Node) for t in cat_tensors
+        ):
+            raise ValueError("cat_tensors must be a sequence of torch.fx.Node objects.")
+
+        if len(cat_node.args) > 1:
+            cat_dim = cat_node.args[1]
+        else:
+            cat_dim = cat_node.kwargs.get("dim", None)
+        if not isinstance(cat_dim, int):
+            raise ValueError("cat_dim must be an integer.")
+
         # If the cat op has default dim, then the concat dim is 0
-        if len(cat_node.args) == 1 or cat_node.args[1] == 0:
+        if len(cat_tensors) == 1 or cat_dim == 0:
             return True
-        # Get the concatenation dimension and concatenated tensors
-        (cat_tensors, cat_dim) = cast(
-            tuple[Sequence[torch.fx.Node], int], cat_node.args
-        )
+
+        # Make sure all dimes before cat_dim are 1.
         for tensor in cat_tensors:
+            if not isinstance(tensor, torch.fx.Node):
+                continue
             shape = get_shape(graph_module, tensor)
             if shape is None or not all(dim == 1 for dim in shape[0:cat_dim]):
                 return False
diff --git a/backends/cadence/aot/ops_registrations.py b/backends/cadence/aot/ops_registrations.py
index cdaca41569f..4a6edf03c0e 100644
--- a/backends/cadence/aot/ops_registrations.py
+++ b/backends/cadence/aot/ops_registrations.py
@@ -167,6 +167,13 @@
     "where_Scalar.out(Tensor condition, float self, float other, *, Tensor(a!) out) -> Tensor(a!)"
 )
 
+lib.define(
+    "rope(Tensor input, Tensor sin_tensor, Tensor cos_tensor, Tensor? pos) -> (Tensor out)"
+)
+lib.define(
+    "rope.out(Tensor input, Tensor sin_tensor, Tensor cos_tensor, Tensor? pos, *, Tensor(a!) out) -> Tensor(a!)"
+)
+
 # ------------------------------------ #
 #   Migrated from custom_ops.yaml      #
 # ------------------------------------ #
@@ -954,3 +961,29 @@ def where_Scalar_meta(
     other: float,
 ) -> torch.Tensor:
     return condition.new_empty(condition.size(), dtype=torch.float32)
+
+
+@register_fake("cadence::rope")
+def rope_meta(
+    input: torch.Tensor,
+    sin_tensor: torch.Tensor,
+    cos_tensor: torch.Tensor,
+    pos: Optional[torch.Tensor],
+) -> torch.Tensor:
+    input_shape = list(input.shape)
+    assert (
+        len(input_shape) in (4, 5) and input_shape[0] == 1
+    ), f"input shape {input_shape} must be (1, seq, h, hd) or (1, seq, h, hd / 2, 2)"
+    seq = input_shape[1]
+    h = input_shape[2]
+    hd = prod(input_shape) / (seq * h)
+    sin_shape = list(sin_tensor.shape)
+    cos_shape = list(cos_tensor.shape)
+    assert sin_shape == cos_shape, f"{sin_shape=} must be same as {cos_shape}"
+    assert (
+        len(sin_shape) == 2 and sin_shape[-1] == hd // 2
+    ), f"{sin_shape=} must be [seq, hd/2]"
+    assert (
+        pos is None or len(pos.shape) == 1 and pos.shape[0] == seq
+    ), f"{pos.shape} must be [{seq}]"
+    return input.new_empty(input.shape, dtype=input.dtype)
diff --git a/backends/cadence/aot/replace_ops.py b/backends/cadence/aot/replace_ops.py
index 358ec1d6a4b..e5a88c10a3f 100644
--- a/backends/cadence/aot/replace_ops.py
+++ b/backends/cadence/aot/replace_ops.py
@@ -939,8 +939,8 @@ def replace_conv_with_nhwc_conv(self, graph_module: torch.fx.GraphModule):
 
 # This pass needs to be reworked to be compatible with PT2. It is an optimization
 # pass anyway, so move it to opt level 2.
-# TODO(matthiascremon): update and improve this pass.
-@register_cadence_pass(CadencePassAttribute(opt_level=2))
+# TODO: T213724613 update and improve this pass.
+# @register_cadence_pass(CadencePassAttribute(opt_level=2))
 class ReplaceConvWithChannelLastConvPass(ExportPass):
     """
     Replace the ATen convolution op with custom conv op with NCHW or NHWC layout
@@ -2065,11 +2065,10 @@ def call_operator(
         return super().call_operator(op, args, kwargs, meta)
 
 
-@register_cadence_pass(CadencePassAttribute(opt_level=2))
-class ReplaceGeluWithApproximateGeluPass(ExportPass):
+@register_cadence_pass(CadencePassAttribute(opt_level=0))
+class ReplaceAtenApproxGeluWithApproxGeluPass(ExportPass):
     """
-    Replace the gelu op with an approximate gelu op. The approximate gelu op
-    is more efficient on DSP backends.
+    Replace the aten gelu op with an approximate arg with an approximate gelu op.
     """
 
     def call_operator(
@@ -2079,6 +2078,9 @@ def call_operator(
         kwargs: Dict[str, Argument],
         meta: NodeMetadata,
     ) -> ProxyValue:
+        if "approximate" not in kwargs:
+            return super().call_operator(op, args, kwargs, meta)
+
         if op not in {
             exir_ops.edge.aten.gelu.default,
         }:
@@ -2414,7 +2416,7 @@ class CadenceReplaceOpsInGraph:
         ReplaceSingleElementTensorArgumentsFromFullOpWithScalarPass,
         ReplaceAtenAvgPoolWithJarvisAvgPoolPass,
         ReplaceWhereWithFullArgsWithWhereScalar,
-        ReplaceGeluWithApproximateGeluPass,
+        ReplaceAtenApproxGeluWithApproxGeluPass,
         ReplaceSplitWithSlicePass,
         ReplacePowWithMulPass,
     ]
diff --git a/backends/cadence/aot/tests/test_memory_passes.py b/backends/cadence/aot/tests/test_memory_passes.py
index c32809c2bff..d220007e227 100644
--- a/backends/cadence/aot/tests/test_memory_passes.py
+++ b/backends/cadence/aot/tests/test_memory_passes.py
@@ -14,13 +14,23 @@
 import executorch.backends.cadence.aot.ops_registrations  # noqa
 import torch
 from executorch.backends.cadence.aot import compiler
-from executorch.backends.cadence.aot.memory_planning import find_peak_memory_usage
+from executorch.backends.cadence.aot.graph_builder import GraphBuilder
+from executorch.backends.cadence.aot.memory_planning import (
+    CadenceMemoryPlanning,
+    find_peak_memory_usage,
+)
 from executorch.backends.cadence.aot.pass_utils import count_node
-from executorch.backends.cadence.aot.utils import MemoryConfig
+from executorch.backends.cadence.aot.utils import (
+    get_default_memory_config,
+    MemoryConfig,
+)
 from executorch.exir import memory
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.memory_planning import collect_specs_from_nodes
+from executorch.exir.passes.spec_prop_pass import SpecPropPass
 from executorch.exir.tests.models import MultiLayerPerceptron
+from parameterized.parameterized import parameterized
+from torch.fx import GraphModule
 
 
 class TestMemPlanningPasses(unittest.TestCase):
@@ -120,24 +130,27 @@ def forward(self, x):
 
 class TestMemTransform(unittest.TestCase):
     def _verify_cat_nop_memory_alloc(self, node: torch.fx.Node) -> None:
-        spec = node.meta.get("spec", None)
-        self.assertIsNotNone(spec)
-        dim: int = cast(int, node.args[1]) if len(node.args) > 1 else 0
-        outer_size = math.prod(spec.shape[:dim])
+        node_spec = node.meta.get("spec", None)
+        self.assertIsNotNone(node_spec)
+        dim: int = cast(int, node.kwargs["dim"]) if "dim" in node.kwargs else 0
+        outer_size = math.prod(node_spec.shape[:dim])
         self.assertEqual(
             outer_size,
             1,
             f"{node=} has wrong outer size: {outer_size=}, expected 1.",
         )
-        inner_dim_elements = math.prod(spec.shape[dim + 1 :]) * spec.dtype.itemsize
+        inner_dim_elements = (
+            math.prod(node_spec.shape[dim + 1 :]) * node_spec.dtype.itemsize
+        )
         dim_offset = 0
         for arg in cast(list[torch.fx.Node], node.args[0]):
             arg_spec = arg.meta.get("spec", None)
-            self.assertEqual(arg_spec.mem_id, spec.mem_id)
+            self.assertEqual(arg_spec.mem_id, node_spec.mem_id)
+            actual_offset = node_spec.mem_offset + dim_offset * inner_dim_elements
             self.assertEqual(
                 arg_spec.mem_offset,
-                spec.mem_offset + dim_offset * inner_dim_elements,
-                f"{arg=} for node {node=} has wrong memory offset: {arg_spec.mem_offset=} {dim_offset=} for cat on {dim=}, but output has {spec.mem_offset=}",
+                actual_offset,
+                f"{arg=} of node {node=} has wrong memory offset: expected {arg_spec.mem_offset=}, but got {actual_offset=} = {node_spec.mem_offset=} + {dim_offset=} * {inner_dim_elements=}",
             )
             dim_offset += arg_spec.shape[dim]
 
@@ -209,23 +222,45 @@ def verify_nop_memory_alloc(self, graph_module: torch.fx.GraphModule) -> None:
         ):
             self._verify_select_nop_memory_alloc(node)
 
-    def test_optimize_cat_on_placeholders(self) -> None:
-        class Cat(torch.nn.Module):
-            def forward(self, x, y):
-                return torch.ops.aten.cat((x, y))
-
-        x = torch.ones(3, 6)
-        y = torch.ones(2, 6)
-        # Optimizing cat ops is only at opt_level 2+, and requires the memory planning
-        # pass to run:
-        graph_module = (
-            compiler.export_to_executorch_gen_etrecord(
-                Cat(), (x, y), opt_level=2, mem_algo=1
-            )
-            .exported_program()
-            .graph_module
-        )
-        logging.info(f"graph_module: {graph_module.print_readable(print_output=False)}")
+    # Initializes the nodes metadata and runs the GenerateMemoryViewConstraints,
+    # GenerateSliceAndSelectNopConstraints, and GenerateCatNopConstraints passes.
+    def run_memory_planning(self, original, alloc_graph_input=True) -> GraphModule:
+        graph_module = SpecPropPass().call(original).graph_module
+        return CadenceMemoryPlanning(
+            get_default_memory_config(),
+            opt_level=2,
+            mem_algo=1,  # greedy_by_size_for_offset_calculation_with_hierarchy
+            alloc_graph_input=alloc_graph_input,
+        )(graph_module).graph_module
+
+    @parameterized.expand(
+        [
+            [
+                [3, 6],  # x_shape
+                [2, 6],  # y_shape
+                0,  # concat dim
+            ],
+        ]
+    )
+    def test_optimize_cat_on_placeholders(self, x_shape, y_shape, concat_dim) -> None:
+        concat_shape = [x_shape[concat_dim] + y_shape[concat_dim], x_shape[1]]
+        builder = GraphBuilder()
+        x = builder.placeholder("x", torch.ones(*x_shape))
+        y = builder.placeholder("y", torch.ones(*y_shape))
+        pre_created_output = builder.call_operator(
+            op=exir_ops.edge.aten.full.default,
+            args=(concat_shape, 0.0),
+            kwargs={"dtype": torch.float32},
+        )
+        graph_output = builder.call_operator(
+            op=torch.ops.aten.cat.out,
+            args=([x, y],),
+            kwargs={"dim": concat_dim, "out": pre_created_output},
+        )
+        builder.output([graph_output])
+        original = builder.get_graph_module()
+
+        graph_module = self.run_memory_planning(original)
         graph_module.graph.eliminate_dead_code()
         # Assert that cat op is optimized away
         self.assertEqual(count_node(graph_module, torch.ops.aten.cat.out), 0)
@@ -233,53 +268,88 @@ def forward(self, x, y):
         self.assertEqual(count_node(graph_module, torch.ops.aten._cat_nop.out), 1)
         self.verify_nop_memory_alloc(graph_module)
 
-    def test_optimize_cat_outermost(self) -> None:
-        class OptimizeCatFeasible1(torch.nn.Module):
-            def forward(self, x, y):
-                x1 = torch.add(x, 2.4, 3.1)
-                y1 = torch.add(y, 1, 2)
-                # Cat along the outermost dimension can be optimized away after
-                # adding constraints on the locations of x1 and y1.
-                return torch.ops.aten.cat((x1, y1))
-
-        x = torch.ones(3, 6)
-        y = torch.ones(2, 6)
-        # Optimizing cat ops is only at opt_level 2+, and requires the memory planning
-        # pass to run:
-        graph_module = (
-            compiler.export_to_executorch_gen_etrecord(
-                OptimizeCatFeasible1(), (x, y), opt_level=2, mem_algo=1
+    # Returns a GraphModule with the following structure:
+    # "add_add_cat_model" : cat(x + 123, y + 456)
+    # "add_add_cat_add_model": cat(x + 123, y + 456) + 789
+    def get_graph_module(
+        self, model_name, x_shape, y_shape, concated_shape, concat_dim
+    ) -> GraphModule:
+        builder = GraphBuilder()
+        x = builder.placeholder("x", torch.ones(*x_shape, dtype=torch.float32))
+        y = builder.placeholder("y", torch.ones(*y_shape, dtype=torch.float32))
+        to_add_to_x = builder.call_operator(
+            op=exir_ops.edge.aten.full.default,
+            args=(x_shape, 123.0),
+            kwargs={"dtype": torch.float32},
+        )
+        add_x = builder.call_operator(
+            op=exir_ops.edge.aten.add.Tensor,
+            args=(x, to_add_to_x),
+        )
+        to_add_to_y = builder.call_operator(
+            op=exir_ops.edge.aten.full.default,
+            args=(y_shape, 456.0),
+            kwargs={"dtype": torch.float32},
+        )
+        add_y = builder.call_operator(
+            op=exir_ops.edge.aten.add.Tensor,
+            args=(y, to_add_to_y),
+        )
+        pre_created_output = builder.call_operator(
+            op=exir_ops.edge.aten.full.default,
+            args=(concated_shape, 0.0),
+            kwargs={"dtype": torch.float32},
+        )
+        cat = builder.call_operator(
+            op=torch.ops.aten.cat.out,
+            args=([add_x, add_y],),
+            kwargs={"dim": concat_dim, "out": pre_created_output},
+        )
+        if model_name == "add_add_cat_model":
+            builder.output([cat])
+            return builder.get_graph_module()
+
+        if model_name == "add_add_cat_add_model":
+            to_add_to_cat = builder.call_operator(
+                op=exir_ops.edge.aten.full.default,
+                args=(concated_shape, 789.0),
+                kwargs={"dtype": torch.float32},
             )
-            .exported_program()
-            .graph_module
-        )
-        graph_module.graph.eliminate_dead_code()
-        # Assert that cat op is optimized away
-        self.assertEqual(count_node(graph_module, torch.ops.aten.cat.out), 0)
-        # Assert that cat op is replaced by its nop version post optimization
-        self.assertEqual(count_node(graph_module, torch.ops.aten._cat_nop.out), 1)
-        self.verify_nop_memory_alloc(graph_module)
-
-    def test_optimize_cat_non_outermost(self) -> None:
-        class OptimizeCatFeasible2(torch.nn.Module):
-            def forward(self, x, y):
-                x1 = torch.add(x, 2.4, 3.1)
-                y1 = torch.add(y, 1, 2)
-                # Cat along the outermost dimension can be optimized away after
-                # adding constraints on the locations of x1 and y1.
-                return torch.ops.aten.cat((x1, y1), 1)
-
-        x = torch.ones(1, 3, 6)
-        y = torch.ones(1, 2, 6)
-        # Optimizing cat ops is only at opt_level 2+, and requires the memory planning
-        # pass to run:
-        graph_module = (
-            compiler.export_to_executorch_gen_etrecord(
-                OptimizeCatFeasible2(), (x, y), opt_level=2, mem_algo=1
+            graph_output = builder.call_operator(
+                op=exir_ops.edge.aten.add.Tensor,
+                args=(cat, to_add_to_cat),
             )
-            .exported_program()
-            .graph_module
-        )
+            builder.output([graph_output])
+            return builder.get_graph_module()
+
+        raise ValueError(f"Unknown model name {model_name}")
+
+    @parameterized.expand(
+        [
+            (
+                "outermost",
+                [3, 6],  # x_shape
+                [2, 6],  # y_shape
+                [5, 6],  # concated_shape
+                0,  # concat dim
+            ),
+            (
+                "non_outermost",
+                [1, 3, 6],  # x_shape
+                [1, 2, 6],  # y_shape
+                [1, 5, 6],  # concated_shape
+                1,  # concat dim
+            ),
+        ],
+        name_func=lambda f, _, param: f"{f.__name__}_{param.args[0]}",
+    )
+    def test_cat_optimized(
+        self, _, x_shape, y_shape, concated_shape, concat_dim
+    ) -> None:
+        original = self.get_graph_module(
+            "add_add_cat_model", x_shape, y_shape, concated_shape, concat_dim
+        )
+        graph_module = self.run_memory_planning(original)
         graph_module.graph.eliminate_dead_code()
         # Assert that cat op is optimized away
         self.assertEqual(count_node(graph_module, torch.ops.aten.cat.out), 0)
@@ -287,111 +357,181 @@ def forward(self, x, y):
         self.assertEqual(count_node(graph_module, torch.ops.aten._cat_nop.out), 1)
         self.verify_nop_memory_alloc(graph_module)
 
-    def test_no_optimize_cat_non_outermost(self) -> None:
-        class OptimizeCatInfeasible1(torch.nn.Module):
-            def forward(self, x, y):
-                x1 = torch.add(x, 2.4, 3.1)
-                y1 = torch.add(y, 1, 2)
-                # Cat along the outermost dimension can be optimized away after
-                # adding constraints on the locations of x1 and y1.
-                return torch.ops.aten.cat((x1, y1), 1)
-
-        x = torch.ones(2, 4, 5)
-        y = torch.ones(2, 2, 5)
-        # Optimizing cat ops is only at opt_level 2+, and requires the memory planning
-        # pass to run
-        graph_module = (
-            compiler.export_to_executorch_gen_etrecord(
-                OptimizeCatInfeasible1(), (x, y), opt_level=2, mem_algo=1
-            )
-            .exported_program()
-            .graph_module
-        )
+    @parameterized.expand(
+        [
+            (
+                "non_outermost",
+                [2, 4, 5],  # x_shape
+                [2, 2, 5],  # y_shape
+                [2, 6, 5],  # concated_shape
+                1,  # concat dim
+            ),
+        ],
+        name_func=lambda f, _, param: f"{f.__name__}_{param.args[0]}",
+    )
+    def test_cat_not_optimized(
+        self, _, x_shape, y_shape, concated_shape, concat_dim
+    ) -> None:
+        original = self.get_graph_module(
+            "add_add_cat_model", x_shape, y_shape, concated_shape, concat_dim
+        )
+        graph_module = self.run_memory_planning(original)
         graph_module.graph.eliminate_dead_code()
-        # Assert that cat op is not optimized away, since the concat is not
-        # along the outermost dim
+        # Assert that cat op is not optimized away, since the concat is not along the outermost dim.
+        # The first dimension is 2, but all dims before cat_dim should be == 1.
         self.assertEqual(count_node(graph_module, torch.ops.aten.cat.out), 1)
         self.verify_nop_memory_alloc(graph_module)
 
-    def test_no_optimize_cat_non_outermost1(self) -> None:
-        class OptimizeCatInfeasible2(torch.nn.Module):
-            def forward(self, x, y):
-                x1 = torch.add(x, 2.4, 3.1)
-                y1 = torch.add(y, 1, 2)
-                # Cat along the outermost dimension can be optimized away after
-                # adding constraints on the locations of x1 and y1.
-                return torch.ops.aten.cat((x1, y1), 0) + 2
+    @parameterized.expand(
+        [
+            (
+                "aligned",
+                [5, 8],  # x_shape
+                [3, 8],  # y_shape
+                [8, 8],  # concated_shape
+                0,  # concat dim
+                0,  # expected cat nodes
+            ),
+            (
+                "unaligned",  # 5 * 5 * 4 % 8 != 0
+                [5, 5],  # x_shape
+                [3, 5],  # y_shape
+                [8, 5],  # concated_shape
+                0,  # concat dim
+                1,  # expected cat nodes
+            ),
+        ],
+        name_func=lambda f, _, param: f"{f.__name__}_{param.args[0]}",
+    )
+    def test_cat_not_graph_output(
+        self, _, x_shape, y_shape, concated_shape, concat_dim, expected_cat_nodes
+    ) -> None:
+        original = self.get_graph_module(
+            "add_add_cat_add_model", x_shape, y_shape, concated_shape, concat_dim
+        )
+        graph_module = self.run_memory_planning(original)
+        graph_module.graph.eliminate_dead_code()
 
-        x = torch.ones(5, 5)
-        y = torch.ones(3, 5)
-        # Optimizing cat ops is only at opt_level 2+, and requires the memory planning
-        # pass to run:
-        graph_module = (
-            compiler.export_to_executorch_gen_etrecord(
-                OptimizeCatInfeasible2(), (x, y), opt_level=2, mem_algo=1
-            )
-            .exported_program()
-            .graph_module
+        # Assert that cat op is optimized away only if its arguments offsets are multiple of 8 bytes.
+        self.assertEqual(
+            count_node(graph_module, torch.ops.aten.cat.out), expected_cat_nodes
         )
-        graph_module.graph.eliminate_dead_code()
-        # Assert that cat op is not optimized away, since the concat relative
-        # offsets are not multiple of 8 bytes, and the cat is not the output
-        # of the graph.
-        self.assertEqual(count_node(graph_module, torch.ops.aten.cat.out), 1)
         self.verify_nop_memory_alloc(graph_module)
 
     def test_optimize_cat_with_slice(self) -> None:
-        class OptimizeCatSliceFeasible(torch.nn.Module):
-            def forward(self, x):
-                x1 = torch.add(x, 2.4, 3.1)
-                x2 = torch.ops.aten.slice(x, 0, 0, 1)
-                x3 = torch.ops.aten.cat((x1, x2))
-                return torch.add(x3, x3)
-
-        x = torch.randn(5, 6)
-        # Compile, and set alloc_graph_input to False so that slice op is not
-        # optimized away.
-        # Optimizing cat ops is only at opt_level 2+, and requires the memory planning
-        # pass to run:
-        graph_module = (
-            compiler.export_to_executorch_gen_etrecord(
-                OptimizeCatSliceFeasible(),
-                (x,),
-                opt_level=2,
-                mem_algo=1,
-                alloc_graph_input=False,
-            )
-            .exported_program()
-            .graph_module
-        )
+        x_shape = [5, 6]
+        concated_shape = [6, 6]
+        concat_dim = 0
+        builder = GraphBuilder()
+        x = builder.placeholder("x", torch.ones(*x_shape, dtype=torch.float32))
+        to_add_to_x = builder.call_operator(
+            op=exir_ops.edge.aten.full.default,
+            args=(x_shape, 123.0),
+            kwargs={"dtype": torch.float32},
+        )
+        add_x = builder.call_operator(
+            op=exir_ops.edge.aten.add.Tensor,
+            args=(x, to_add_to_x),
+        )
+        slice_x = builder.call_operator(
+            op=exir_ops.edge.aten.slice.Tensor,
+            args=(
+                x,
+                0,  # dim
+                0,  # start
+                1,  # end
+                1,  # step
+            ),
+        )
+        pre_created_output = builder.call_operator(
+            op=exir_ops.edge.aten.full.default,
+            args=(concated_shape, 0.0),
+            kwargs={"dtype": torch.float32},
+        )
+        cat = builder.call_operator(
+            op=torch.ops.aten.cat.out,
+            args=([add_x, slice_x],),
+            kwargs={"dim": concat_dim, "out": pre_created_output},
+        )
+        graph_output = builder.call_operator(
+            op=exir_ops.edge.aten.add.Tensor,
+            args=(cat, cat),
+        )
+        builder.output([graph_output])
+        original = builder.get_graph_module()
+
+        graph_module = self.run_memory_planning(original, alloc_graph_input=False)
         graph_module.graph.eliminate_dead_code()
-        # Assert that cat op is optimized away
+
+        # Assert that cat op is optimized away.
+        self.assertEqual(count_node(graph_module, torch.ops.aten.cat.out), 0)
+        # Assert that cat op is replaced by its nop version post optimization.
         self.assertEqual(count_node(graph_module, torch.ops.aten._cat_nop.out), 1)
+        # Assert that slice op was not optimized away.
+        self.assertEqual(count_node(graph_module, exir_ops.edge.aten.slice.Tensor), 1)
         self.verify_nop_memory_alloc(graph_module)
 
     def test_optimize_cat_with_slice_infeasible(self) -> None:
-        class OptimizeCatSliceInfeasible(torch.nn.Module):
-            def forward(self, x, y):
-                x1 = torch.add(x, 2.4, 3.1)
-                y1 = torch.add(y, 1, 2)
-                y2 = torch.ops.aten.slice(y1, 0, 0, 1)
-                # Cat can't be optimized away if any of the tensor (e.g., y1)
-                # is slice_nop
-                return torch.ops.aten.cat((y2, x1))
-
-        x = torch.ones(3, 5)
-        y = torch.ones(2, 5)
-        # Optimizing cat ops is only at opt_level 2+, and requires the memory planning
-        # pass to run:
-        graph_module = (
-            compiler.export_to_executorch_gen_etrecord(
-                OptimizeCatSliceInfeasible(), (x, y), opt_level=2, mem_algo=1
-            )
-            .exported_program()
-            .graph_module
-        )
+        x_shape = [5, 6]
+        y_shape = [3, 6]
+        concated_shape = [8, 6]
+        concat_dim = 0
+        builder = GraphBuilder()
+        x = builder.placeholder("x", torch.ones(*x_shape, dtype=torch.float32))
+        y = builder.placeholder("y", torch.ones(*y_shape, dtype=torch.float32))
+        to_add_to_x = builder.call_operator(
+            op=exir_ops.edge.aten.full.default,
+            args=(x_shape, 123.0),
+            kwargs={"dtype": torch.float32},
+        )
+        add_x = builder.call_operator(
+            op=exir_ops.edge.aten.add.Tensor,
+            args=(x, to_add_to_x),
+        )
+        to_add_to_y = builder.call_operator(
+            op=exir_ops.edge.aten.full.default,
+            args=(y_shape, 123.0),
+            kwargs={"dtype": torch.float32},
+        )
+        add_y = builder.call_operator(
+            op=exir_ops.edge.aten.add.Tensor,
+            args=(y, to_add_to_y),
+        )
+        slice_out = builder.call_operator(
+            op=exir_ops.edge.aten.full.default,
+            args=(y_shape, 0.0),
+            kwargs={"dtype": torch.float32},
+        )
+        slice_y = builder.call_operator(
+            op=torch.ops.aten.slice_copy.Tensor_out,
+            args=(
+                add_y,
+                0,  # dim
+                0,  # start
+                1,  # end
+                1,  # step
+            ),
+            kwargs={"out": slice_out},
+        )
+        pre_created_output = builder.call_operator(
+            op=exir_ops.edge.aten.full.default,
+            args=(concated_shape, 0.0),
+            kwargs={"dtype": torch.float32},
+        )
+        cat = builder.call_operator(
+            op=torch.ops.aten.cat.out,
+            args=([slice_y, add_x],),
+            kwargs={"dim": concat_dim, "out": pre_created_output},
+        )
+        builder.output([cat])
+        original = builder.get_graph_module()
+        graph_module = self.run_memory_planning(original, alloc_graph_input=False)
         graph_module.graph.eliminate_dead_code()
-        # Assert that cat op is not optimized away
+        # # Assert that slice op is optimized away.
+        self.assertEqual(
+            count_node(graph_module, torch.ops.aten._slice_copy_nop.Tensor_out), 1
+        )
+        # # Assert that cat op is not optimized away
         self.assertEqual(count_node(graph_module, torch.ops.aten.cat.out), 1)
         self.verify_nop_memory_alloc(graph_module)
 
diff --git a/backends/cadence/aot/tests/test_replace_ops_passes.py b/backends/cadence/aot/tests/test_replace_ops_passes.py
index e7bf8e9cefa..e8215c378f9 100644
--- a/backends/cadence/aot/tests/test_replace_ops_passes.py
+++ b/backends/cadence/aot/tests/test_replace_ops_passes.py
@@ -26,13 +26,13 @@
     ForceChannelLastForConvPass,
     MakeSliceAndCatDimOutermostPass,
     ReplaceAddMMWithLinearPass,
+    ReplaceAtenApproxGeluWithApproxGeluPass,
     ReplaceAtenConvolutionWithJarvisConvolutionPass,
     ReplaceConstantPadNdWithSlicePass,
     ReplaceConvolutionOptionalArgsWithConcreteArgsPass,
     ReplaceConvWithIm2RowAndLinear,
     ReplaceEmptyTensorsWithFullPass,
     ReplaceFunctionallyEquivalentOpTargets,
-    ReplaceGeluWithApproximateGeluPass,
     ReplaceIm2RowWithViewPass,
     ReplaceLinearWithFullyConnectedOpPass,
     ReplaceMatmulWithTransposedMatmulPass,
@@ -1287,17 +1287,41 @@ def forward(self, cond: torch.Tensor):
             1,
         )
 
-    def test_replace_aten_gelu_with_approximate_gelu(self):
-        class Gelu(torch.nn.Module):
-            def forward(self, input):
-                return torch.nn.functional.gelu(input)
+    def test_no_replace_aten_gelu_with_approximate_gelu(self):
+        inputs = torch.randn(2, 1, 64)
+
+        gm = single_op_builder(
+            placeholders=(inputs,),
+            op=exir_ops.edge.aten.gelu.default,
+            args=(inputs,),
+        )
+        gm = ExportPass().call(gm).graph_module
+
+        p = ReplaceAtenApproxGeluWithApproxGeluPass()
+        graph_after_passes = p.call(gm).graph_module
 
+        # Assert that aten.gelu op was not decomposed, since it didn't have an approximate argument
+        self.assertEqual(
+            count_node(
+                graph_after_passes,
+                exir_ops.edge.aten.gelu.default,
+            ),
+            1,
+        )
+
+    def test_replace_aten_approximate_gelu_with_approximate_gelu(self):
         inputs = torch.randn(2, 1, 64)
 
-        graph_module = export_to_edge(Gelu(), (inputs,)).exported_program().graph_module
+        gm = single_op_builder(
+            placeholders=(inputs,),
+            op=exir_ops.edge.aten.gelu.default,
+            args=(inputs,),
+            kwargs={"approximate": "tanh"},
+        )
+        gm = ExportPass().call(gm).graph_module
 
-        p = ReplaceGeluWithApproximateGeluPass()
-        graph_after_passes = cast(PassResult, p(graph_module)).graph_module
+        p = ReplaceAtenApproxGeluWithApproxGeluPass()
+        graph_after_passes = p.call(gm).graph_module
 
         # Assert that aten.gelu op was decomposed
         self.assertEqual(
diff --git a/backends/cadence/utils/facto_util.py b/backends/cadence/utils/facto_util.py
index 8cd57059244..b896f8a8e89 100644
--- a/backends/cadence/utils/facto_util.py
+++ b/backends/cadence/utils/facto_util.py
@@ -20,8 +20,8 @@
 MAX_CASES = 50
 
 
-def apply_tensor_contraints(op_name: str, tensor_constraints: list[object]) -> None:
-    additional_tensor_constraints = [
+def apply_tensor_contraints(op_name: str, index: int) -> list[object]:
+    tensor_constraints = [
         cp.Dtype.In(lambda deps: [torch.int, torch.float]),
         cp.Dtype.NotIn(lambda deps: [torch.int64, torch.float64]),
         cp.Value.Ge(lambda deps, dtype, struct: -(2**4)),
@@ -33,17 +33,28 @@ def apply_tensor_contraints(op_name: str, tensor_constraints: list[object]) -> N
 
     match op_name:
         case "where.self":
-            additional_tensor_constraints = [
-                cp.Dtype.In(lambda deps: [torch.float, torch.int, torch.bool]),
-                cp.Dtype.NotIn(lambda deps: [torch.int64, torch.float64]),
-                cp.Value.Ge(lambda deps, dtype, struct: -(2**4)),
-                cp.Value.Le(lambda deps, dtype, struct: 2**4),
-                cp.Rank.Ge(lambda deps: 1),
-                cp.Size.Ge(lambda deps, r, d: 1),
-                cp.Size.Le(lambda deps, r, d: 2**9),
-            ]
+            if index == 0:  # condition
+                tensor_constraints = [
+                    cp.Dtype.In(lambda deps: [torch.bool]),
+                    cp.Dtype.NotIn(lambda deps: [torch.int64, torch.float64]),
+                    cp.Value.Ge(lambda deps, dtype, struct: -(2**4)),
+                    cp.Value.Le(lambda deps, dtype, struct: 2**4),
+                    cp.Rank.Ge(lambda deps: 1),
+                    cp.Size.Ge(lambda deps, r, d: 1),
+                    cp.Size.Le(lambda deps, r, d: 2**9),
+                ]
+            else:
+                tensor_constraints = [
+                    cp.Dtype.In(lambda deps: [torch.float, torch.int]),
+                    cp.Dtype.NotIn(lambda deps: [torch.int64, torch.float64]),
+                    cp.Value.Ge(lambda deps, dtype, struct: -(2**4)),
+                    cp.Value.Le(lambda deps, dtype, struct: 2**4),
+                    cp.Rank.Ge(lambda deps: 1),
+                    cp.Size.Ge(lambda deps, r, d: 1),
+                    cp.Size.Le(lambda deps, r, d: 2**9),
+                ]
         case "sigmoid.default":
-            additional_tensor_constraints.extend(
+            tensor_constraints.extend(
                 [
                     cp.Dtype.In(lambda deps: [torch.float]),
                     cp.Rank.Le(lambda deps: 2**2),
@@ -52,7 +63,7 @@ def apply_tensor_contraints(op_name: str, tensor_constraints: list[object]) -> N
                 ]
             )
         case "rsqrt.default":
-            additional_tensor_constraints.extend(
+            tensor_constraints.extend(
                 [
                     cp.Dtype.In(lambda deps: [torch.float]),
                     cp.Rank.Le(lambda deps: 2**2),
@@ -63,14 +74,14 @@ def apply_tensor_contraints(op_name: str, tensor_constraints: list[object]) -> N
                 ]
             )
         case "mean.dim":
-            additional_tensor_constraints.extend(
+            tensor_constraints.extend(
                 [
                     cp.Dtype.In(lambda deps: [torch.float]),
                     cp.Rank.Le(lambda deps: 2**2),
                 ]
             )
         case "exp.default":
-            additional_tensor_constraints.extend(
+            tensor_constraints.extend(
                 [
                     cp.Rank.Le(lambda deps: 2**3),
                     cp.Value.Ge(lambda deps, dtype, struct: -(2**2)),
@@ -78,7 +89,7 @@ def apply_tensor_contraints(op_name: str, tensor_constraints: list[object]) -> N
                 ]
             )
         case "slice_copy.Tensor":
-            additional_tensor_constraints.extend(
+            tensor_constraints.extend(
                 [
                     cp.Rank.Le(lambda deps: 2),
                     cp.Value.Ge(lambda deps, dtype, struct: 1),
@@ -86,12 +97,12 @@ def apply_tensor_contraints(op_name: str, tensor_constraints: list[object]) -> N
                 ]
             )
         case _:
-            additional_tensor_constraints.extend(
+            tensor_constraints.extend(
                 [
                     cp.Rank.Le(lambda deps: 2**2),
                 ]
             )
-    tensor_constraints.extend(additional_tensor_constraints)
+    return tensor_constraints
 
 
 def apply_scalar_contraints(op_name: str) -> list[ScalarDtype]:
@@ -107,9 +118,6 @@ def apply_scalar_contraints(op_name: str) -> list[ScalarDtype]:
 def facto_testcase_gen(op_name: str) -> List[Tuple[List[str], OrderedDict[str, str]]]:
     # minimal example to test add.Tensor using FACTO
     spec = SpecDictDB[op_name]
-    tensor_constraints = []
-    # common tensor constraints
-    apply_tensor_contraints(op_name, tensor_constraints)
 
     for index, in_spec in enumerate(copy.deepcopy(spec.inspec)):
         if in_spec.type.is_scalar():
@@ -142,7 +150,9 @@ def facto_testcase_gen(op_name: str) -> List[Tuple[List[str], OrderedDict[str, s
                 ]
             )
         elif in_spec.type.is_tensor():
-            spec.inspec[index].constraints.extend(tensor_constraints)
+            spec.inspec[index].constraints.extend(
+                apply_tensor_contraints(op_name, index)
+            )
         elif in_spec.type.is_dim_list():
             spec.inspec[index].constraints.extend(
                 [
diff --git a/backends/qualcomm/builders/op_avg_pool2d.py b/backends/qualcomm/builders/op_avg_pool2d.py
index f4762e8bb5a..6892e7326f6 100644
--- a/backends/qualcomm/builders/op_avg_pool2d.py
+++ b/backends/qualcomm/builders/op_avg_pool2d.py
@@ -23,6 +23,12 @@ class AvgPool2d(NodeVisitor):
     def __init__(self, *args) -> None:
         super().__init__(*args)
 
+    def _get_filter_size(self, node):
+        filter_size = cast(List[int], node.args[1])
+        if len(filter_size) == 1:
+            filter_size = filter_size + filter_size
+        return filter_size
+
     def define_node(
         self,
         node: torch.fx.Node,
@@ -46,31 +52,44 @@ def define_node(
             PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
             nodes_to_wrappers,
         )
+
+        pt_ceil_mode = node.args[4] if len(node.args) > 4 else False
+
         # kernel info
-        filter_size = cast(List[int], node.args[1])
-        if len(filter_size) == 1:
-            filter_size = filter_size + filter_size
+        input_shape = input_node.meta["val"].shape
+        input_h, input_w = input_shape[2], input_shape[3]
+        filter_size = self._get_filter_size(node)
+        if pt_ceil_mode:
+            # filter_size might larger than input_h, input_w, use min of them
+            filter_size = [min(filter_size[0], input_h), min(filter_size[1], input_w)]
         filter_size_shape = [len(filter_size)]
 
-        # stride info - default to kernel_size if not given
-        stride = cast(List[int], node.args[2]) if len(node.args) > 2 else filter_size
-        if len(stride) == 1:
-            stride = stride + stride
-        stride_shape = [len(stride)]
-
         padding = [0, 0]
         if len(node.args) > 3:
             padding = cast(List[int], node.args[3])
             if len(padding) == 1:
                 padding = padding + padding
+            if pt_ceil_mode:
+                ori_filter_h, ori_filter_w = self._get_filter_size(node)
+                padding = [
+                    0 if ori_filter_h > input_h else padding[0],
+                    0 if ori_filter_w > input_w else padding[1],
+                ]
+
         padding_shape = [len(padding), len(padding)]
 
         # if ceil mode is True, use ceil instead of floor to compute the output shape
-        mode = OpPoolAvg2d.RoundingMode.FLOOR
-        if len(node.args) > 4:
-            ceil_mode = cast(bool, node.args[4])
-            if ceil_mode:
-                mode = OpPoolAvg2d.RoundingMode.CEIL
+        mode = (
+            OpPoolAvg2d.RoundingMode.CEIL
+            if pt_ceil_mode
+            else OpPoolAvg2d.RoundingMode.FLOOR
+        )
+
+        # stride info - default to kernel_size if not given
+        stride = cast(List[int], node.args[2]) if len(node.args) > 2 else filter_size
+        if len(stride) == 1:
+            stride = stride + stride
+        stride_shape = [len(stride)]
 
         count_include_pad = True
         if len(node.args) > 5:
diff --git a/backends/qualcomm/quantizer/annotators.py b/backends/qualcomm/quantizer/annotators.py
index 730bdaf47d0..5195cf39f33 100644
--- a/backends/qualcomm/quantizer/annotators.py
+++ b/backends/qualcomm/quantizer/annotators.py
@@ -967,6 +967,7 @@ def annotate_cdist(node: Node, quantization_config: QuantizationConfig) -> None:
 @register_annotator(
     [
         torch.ops.aten.conv2d.default,
+        torch.ops.aten.conv2d.padding,
         torch.ops.aten.conv1d.default,
         torch.ops.aten.conv_transpose2d.input,
         torch.ops.aten.conv_transpose1d.default,
diff --git a/backends/qualcomm/tests/models.py b/backends/qualcomm/tests/models.py
index 23f9e8fd79c..053bef79d1f 100644
--- a/backends/qualcomm/tests/models.py
+++ b/backends/qualcomm/tests/models.py
@@ -147,12 +147,13 @@ def forward(self, x, y):
 
 
 class AvgPoolModule(torch.nn.Module):
-    def __init__(self):
+    def __init__(self, kernel_size, stride, padding, ceil_mode):
         super().__init__()
         self.avgPool = torch.nn.AvgPool2d(
-            kernel_size=(2, 2),
-            padding=(1, 1),
-            stride=(1, 1),
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            ceil_mode=ceil_mode,
             count_include_pad=False,
         )
 
@@ -1268,6 +1269,14 @@ def forward(self, x):
         return x.repeat(1, 2, 3, 4)
 
 
+class ReWriteObs(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, x):
+        return torch.nn.functional.relu(x).expand(3, 4)
+
+
 class Reshape(torch.nn.Module):
     def __init__(self):
         super().__init__()
diff --git a/backends/qualcomm/tests/test_qnn_delegate.py b/backends/qualcomm/tests/test_qnn_delegate.py
index 3616329d32a..031b7244a64 100644
--- a/backends/qualcomm/tests/test_qnn_delegate.py
+++ b/backends/qualcomm/tests/test_qnn_delegate.py
@@ -49,6 +49,7 @@
     generate_qnn_executorch_compiler_spec,
     PyQnnManagerAdaptor,
     QnnPartitioner,
+    rewrite_prepared_observer,
     skip_annotation,
     to_edge_transform_and_lower_to_qnn,
     update_spill_fill_size,
@@ -163,9 +164,19 @@ def test_qnn_backend_argmin(self):
         self.lower_module_and_test_output(module, sample_input)
 
     def test_qnn_backend_avg_pool2d(self):
-        module = AvgPoolModule()  # noqa: F405
-        sample_input = (torch.randn(1, 3, 2, 2),)
-        self.lower_module_and_test_output(module, sample_input)
+        modules = [
+            AvgPoolModule((2, 2), (1, 1), (1, 1), False),  # noqa: F405
+            AvgPoolModule((1280, 1280), (1280, 1280), (0, 0), True),  # noqa: F405
+            AvgPoolModule((1280, 1280), (1280, 1280), (320, 320), True),  # noqa: F405
+        ]  # noqa: F405
+        sample_inputs = [
+            (torch.randn(1, 3, 2, 2),),
+            (torch.randn(1, 1280, 7, 7),),
+            (torch.randn(1, 1280, 7, 7),),
+        ]
+        for i, module in enumerate(modules):
+            with self.subTest(i=i):
+                self.lower_module_and_test_output(module, sample_inputs[i])
 
     def test_qnn_backend_batch_norm(self):
         modules = [BatchNorm(32), BatchNorm(32, False)]  # noqa: F405
@@ -1271,10 +1282,20 @@ def test_qnn_backend_argmin(self):
         self.lower_module_and_test_output(module, sample_input)
 
     def test_qnn_backend_avg_pool2d(self):
-        module = AvgPoolModule()  # noqa: F405
-        sample_input = (torch.randn(1, 3, 2, 2),)
-        module = self.get_qdq_module(module, sample_input)
-        self.lower_module_and_test_output(module, sample_input)
+        modules = [
+            AvgPoolModule((2, 2), (1, 1), (1, 1), False),  # noqa: F405
+            AvgPoolModule((1280, 1280), (1280, 1280), (0, 0), True),  # noqa: F405
+            AvgPoolModule((1280, 1280), (1280, 1280), (320, 320), True),  # noqa: F405
+        ]  # noqa: F405
+        sample_inputs = [
+            (torch.randn(1, 3, 2, 2),),
+            (torch.randn(1, 1280, 7, 7),),
+            (torch.randn(1, 1280, 7, 7),),
+        ]
+        for i, module in enumerate(modules):
+            with self.subTest(i=i):
+                module = self.get_qdq_module(module, sample_inputs[i])
+                self.lower_module_and_test_output(module, sample_inputs[i])
 
     def test_qnn_backend_batch_norm(self):
         modules = [BatchNorm(32), BatchNorm(32, False)]  # noqa: F405
@@ -3038,6 +3059,36 @@ def test_qnn_backend_dynamic_shape(self):
                     check_io_shape=True,
                 )
 
+    def test_qnn_backend_rewrite_prepared_observer(self):
+        from torchao.quantization.pt2e import FixedQParamsObserver
+
+        module = ReWriteObs()  # noqa: F405
+        sample_input = (torch.randn([3, 1]),)
+        module = torch.export.export(module, sample_input, strict=True).module()
+
+        quantizer = make_quantizer()
+
+        prepared = prepare_pt2e(module, quantizer)
+        prepared(*sample_input)
+
+        new_obs = FixedQParamsObserver(
+            scale=0.004,
+            zero_point=0,
+            dtype=torch.uint8,
+            quant_min=0,
+            quant_max=255,
+            qscheme=torch.per_tensor_affine,
+        )
+
+        rewrite_prepared_observer(prepared, {"activation_post_process_2": new_obs})
+        self.assertTrue(
+            prepared.activation_post_process_1
+            == prepared.activation_post_process_2
+            == new_obs
+        )
+        quantized_module = convert_pt2e(prepared)
+        self.lower_module_and_test_output(quantized_module, sample_input)
+
     def test_qnn_backend_skip_node_id_partitioner(self):
         module = SimpleModel()  # noqa: F405
         sample_input = (torch.ones(1, 32, 28, 28), torch.ones(1, 32, 28, 28))
@@ -3829,6 +3880,41 @@ def test_conv_former(self):
                 self.assertGreaterEqual(msg["top_1"], 60)
                 self.assertGreaterEqual(msg["top_5"], 80)
 
+    def test_deit(self):
+        if not self.required_envs([self.image_dataset]):
+            self.skipTest("missing required envs")
+        cmds = [
+            "python",
+            f"{self.executorch_root}/examples/qualcomm/oss_scripts/deit.py",
+            "--dataset",
+            self.image_dataset,
+            "--artifact",
+            self.artifact_dir,
+            "--build_folder",
+            self.build_folder,
+            "--device",
+            self.device,
+            "--model",
+            self.model,
+            "--ip",
+            self.ip,
+            "--port",
+            str(self.port),
+        ]
+        if self.host:
+            cmds.extend(["--host", self.host])
+
+        p = subprocess.Popen(cmds, stdout=subprocess.DEVNULL)
+        with Listener((self.ip, self.port)) as listener:
+            conn = listener.accept()
+            p.communicate()
+            msg = json.loads(conn.recv())
+            if "Error" in msg:
+                self.fail(msg["Error"])
+            else:
+                self.assertGreaterEqual(msg["top_1"], 75)
+                self.assertGreaterEqual(msg["top_5"], 90)
+
     def test_dino_v2(self):
         if not self.required_envs([self.image_dataset]):
             self.skipTest("missing required envs")
@@ -3864,6 +3950,41 @@ def test_dino_v2(self):
                 self.assertGreaterEqual(msg["top_1"], 70)
                 self.assertGreaterEqual(msg["top_5"], 85)
 
+    def test_efficientnet(self):
+        if not self.required_envs([self.image_dataset]):
+            self.skipTest("missing required envs")
+        cmds = [
+            "python",
+            f"{self.executorch_root}/examples/qualcomm/oss_scripts/efficientnet.py"
+            "--dataset",
+            self.image_dataset,
+            "--artifact",
+            self.artifact_dir,
+            "--build_folder",
+            self.build_folder,
+            "--device",
+            self.device,
+            "--model",
+            self.model,
+            "--ip",
+            self.ip,
+            "--port",
+            str(self.port),
+        ]
+        if self.host:
+            cmds.extend(["--host", self.host])
+
+        p = subprocess.Popen(cmds, stdout=subprocess.DEVNULL)
+        with Listener((self.ip, self.port)) as listener:
+            conn = listener.accept()
+            p.communicate()
+            msg = json.loads(conn.recv())
+            if "Error" in msg:
+                self.fail(msg["Error"])
+            else:
+                self.assertGreaterEqual(msg["top_1"], 70)
+                self.assertGreaterEqual(msg["top_5"], 85)
+
     def test_efficientSAM(self):
         if not self.required_envs(
             [self.image_dataset, self.pretrained_weight, self.oss_repo]
diff --git a/backends/qualcomm/utils/utils.py b/backends/qualcomm/utils/utils.py
index 6dcad7ba00b..2d53f4dc71c 100644
--- a/backends/qualcomm/utils/utils.py
+++ b/backends/qualcomm/utils/utils.py
@@ -5,7 +5,7 @@
 # LICENSE file in the root directory of this source tree.
 import operator
 import warnings
-from collections import OrderedDict
+from collections import defaultdict, OrderedDict
 from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 
 import executorch.backends.qualcomm.python.PyQnnManagerAdaptor as PyQnnManagerAdaptor
@@ -1038,3 +1038,53 @@ def tag_quant_io(gm: torch.fx.GraphModule, get_quant_io_dtype_fn: Callable):
     for node in gm.graph.nodes:
         if dtype := get_quant_io_dtype_fn(node):
             node.meta[QCOM_QUANTIZED_IO] = dtype
+
+
+def rewrite_prepared_observer(
+    graph_module: torch.fx.GraphModule, name_obs_dict: Dict[str, torch.nn.Module]
+):
+    """
+    Rewrite the observer of the specified observer module name in the graph_module.
+
+    Example:
+    Consider the following graph_module after prepare_pt2e:
+    gm = prepare_pt2e(gm)
+    print(gm)
+
+    GraphModule(
+      (activation_post_process_0): MinMaxObserver(min_val=inf, max_val=-inf)
+      (activation_post_process_1): MinMaxObserver(min_val=inf, max_val=-inf)
+      (activation_post_process_2): MinMaxObserver(min_val=inf, max_val=-inf)
+      (activation_post_process_3): MinMaxObserver(min_val=inf, max_val=-inf)
+    )
+
+    new_observer = observer.FixedQParamsObserver(
+        scale=0.125,
+        zero_point=42,
+        dtype=torch.uint8,
+        quant_min=0,
+        quant_max=255,
+        qscheme=torch.per_tensor_affine,
+    )
+
+    Calling rewrite_prepared_observer(gm, {"activation_post_process_0": new_observer})
+    is equivalent to:
+    gm.activation_post_process_0 = new_observer
+
+    Note:
+    If the rewritten observer is a SharedQuantizationSpec, all other shared observers will also be rewritten.
+    """
+    module_name_list = defaultdict(list)
+    for name, module in graph_module.named_modules(remove_duplicate=False):
+        module_name_list[module].append(name)
+
+    for name, new_observer in name_obs_dict.items():
+        old_module = getattr(graph_module, name, None)
+
+        if not old_module:
+            print(
+                f"[WARNING], No observer named as {name} found, please check the moudle name"
+            )
+            continue
+        for target_name in module_name_list[old_module]:
+            setattr(graph_module, target_name, new_observer)
diff --git a/backends/vulkan/_passes/squeeze_unsqueeze_inputs.py b/backends/vulkan/_passes/squeeze_unsqueeze_inputs.py
index b4337829d7f..c415249383e 100644
--- a/backends/vulkan/_passes/squeeze_unsqueeze_inputs.py
+++ b/backends/vulkan/_passes/squeeze_unsqueeze_inputs.py
@@ -32,7 +32,13 @@ def should_squeeze(self, op, shape: List[int]) -> bool:  # pyre-ignore
             return shape[1] == 1 and shape[0] > 1
         if len(shape) == 4:
             # No need to squeeze if all dims are 1 except the width dim
-            if all(dim == 1 for dim in shape[:-1]):
+            if shape[0] == shape[1] == shape[2] == 1:
+                return False
+            # No need to squeeze if batch and channel dims are 1 and height and width are > 1
+            if shape[0] == shape[1] == 1 and shape[2] > 1 and shape[3] > 1:
+                return False
+            # No need to squeeze if batch dim is 1 and channel, height and width are > 1
+            if shape[0] == 1 and shape[1] > 1 and shape[2] > 1 and shape[3] > 1:
                 return False
             # Otherwise, check for squeezable dim
             return 1 in shape[:-1]
diff --git a/backends/vulkan/runtime/VulkanBackend.cpp b/backends/vulkan/runtime/VulkanBackend.cpp
index b32f4eb4308..02df85c33e8 100644
--- a/backends/vulkan/runtime/VulkanBackend.cpp
+++ b/backends/vulkan/runtime/VulkanBackend.cpp
@@ -499,6 +499,8 @@ class VulkanBackend final : public ::executorch::runtime::BackendInterface {
     compute_graph->encode_prepack();
     compute_graph->prepack();
 
+    // TODO(ssjia): remove this once we can batch compile compute pipelines
+    // during prepare().
     compute_graph->encode_execute();
 
     return Error::Ok;
@@ -567,9 +569,14 @@ class VulkanBackend final : public ::executorch::runtime::BackendInterface {
       }
     }
 
+    // propagate_resize() will re-encode the command buffer so that push
+    // constants are updated and DynamicDispatchNode can update the compute
+    // shader, global workgroup size, and local workgroup size to perform the
+    // model inference.
     if (should_propagate_resize) {
       compute_graph->propagate_resize();
     }
+
     compute_graph->execute();
 
     for (size_t i = 0; i < compute_graph->outputs().size(); i++) {
diff --git a/backends/vulkan/runtime/gen_vulkan_spv.py b/backends/vulkan/runtime/gen_vulkan_spv.py
index 2b15b2b7d0a..5c59f13fc24 100644
--- a/backends/vulkan/runtime/gen_vulkan_spv.py
+++ b/backends/vulkan/runtime/gen_vulkan_spv.py
@@ -62,6 +62,7 @@
             "uint": "uimage3D",
             "int8": "iimage3D",
             "uint8": "uimage3D",
+            "bool": "uimage3D",
         },
         2: {
             "float": "image2D",
@@ -70,6 +71,7 @@
             "uint": "uimage2D",
             "int8": "iimage2D",
             "uint8": "uimage2D",
+            "bool": "uimage2D",
         },
     },
     "SAMPLER_T": {
@@ -80,6 +82,7 @@
             "uint": "usampler3D",
             "int8": "isampler3D",
             "uint8": "usampler3D",
+            "bool": "usampler3D",
         },
         2: {
             "float": "sampler2D",
@@ -88,6 +91,7 @@
             "uint": "usampler2D",
             "int8": "isampler2D",
             "uint8": "usampler2D",
+            "bool": "usampler2D",
         },
     },
     "IMAGE_FORMAT": {
@@ -97,6 +101,7 @@
         "uint": "rgba32ui",
         "int8": "rgba8i",
         "uint8": "rgba8ui",
+        "bool": "rgba8ui",
     },
 }
 
@@ -115,7 +120,8 @@ def buffer_scalar_type(dtype: str) -> str:
         return "float16_t"
     elif dtype[-1] == "8":
         return dtype + "_t"
-
+    elif dtype == "bool":
+        return "uint8_t"
     return dtype
 
 
@@ -135,17 +141,19 @@ def buffer_gvec_type(dtype: str, n: int) -> str:
         return f"i8vec{n}"
     elif dtype == "uint8":
         return f"u8vec{n}"
+    elif dtype == "bool":
+        return f"u8vec{n}"
 
     raise AssertionError(f"Invalid dtype: {dtype}")
 
 
 def texel_type(dtype: str) -> str:
     image_format = TYPE_MAPPINGS["IMAGE_FORMAT"][dtype]
-    if image_format[-1] == "f":
+    if image_format[-1:] == "f":
         return "vec4"
-    elif image_format[-2] == "ui":
+    elif image_format[-2:] == "ui":
         return "uvec4"
-    elif image_format[-1] == "i":
+    elif image_format[-1:] == "i":
         return "ivec4"
     raise AssertionError(f"Invalid image format: {image_format}")
 
@@ -360,7 +368,7 @@ def define_required_extensions(dtypes: Union[str, List[str]]):
         elif dtype == "int16" or dtype == "uint16":
             nbit = "16bit"
             glsl_type = "int16"
-        elif dtype == "int8" or dtype == "uint8":
+        elif dtype == "int8" or dtype == "uint8" or dtype == "bool":
             nbit = "8bit"
             glsl_type = "int8"
 
diff --git a/backends/vulkan/runtime/graph/ComputeGraph.cpp b/backends/vulkan/runtime/graph/ComputeGraph.cpp
index a4a6abdd63f..1222a9fc641 100644
--- a/backends/vulkan/runtime/graph/ComputeGraph.cpp
+++ b/backends/vulkan/runtime/graph/ComputeGraph.cpp
@@ -492,7 +492,7 @@ vkapi::BufferBindInfo ComputeGraph::get_or_create_int_param_buffer(
     const ValueRef idx) {
   if (values_.at(idx).isInt()) {
     const int32_t val = extract_scalar<int32_t>(idx);
-    create_params_buffer(val);
+    return create_params_buffer(val);
   } else if (values_.at(idx).isSymInt()) {
     SymIntPtr symint = get_symint(idx);
     return vkapi::BufferBindInfo(symint->gpu_buffer.buffer());
@@ -500,6 +500,16 @@ vkapi::BufferBindInfo ComputeGraph::get_or_create_int_param_buffer(
   VK_THROW("Cannot create a int param buffer for the given value");
 }
 
+vkapi::BufferBindInfo ComputeGraph::get_or_create_int_param_buffer(
+    const ValueRef idx,
+    const int32_t default_val) {
+  if (values_.at(idx).isNone()) {
+    return create_params_buffer(default_val);
+  } else {
+    return get_or_create_int_param_buffer(idx);
+  }
+}
+
 void ComputeGraph::set_symint(const ValueRef idx, const int32_t val) {
   get_symint(idx)->set(val);
 }
@@ -678,11 +688,12 @@ void ComputeGraph::encode_execute() {
   }
 }
 
-void ComputeGraph::execute() const {
+void ComputeGraph::execute() {
   vkapi::VulkanFence fence = context_->fences().get_fence();
   context_->submit_cmd_to_gpu(fence.get_submit_handle());
   fence.wait();
   context_->fences().return_fence(fence);
+  execute_count_++;
 }
 
 void ComputeGraph::resize_input(
@@ -692,10 +703,17 @@ void ComputeGraph::resize_input(
   get_tensor(io_val.value)->virtual_resize(new_sizes);
 }
 
+void ComputeGraph::virtual_resize(
+    const ValueRef idx,
+    const std::vector<int64_t>& new_sizes) {
+  get_tensor(idx)->virtual_resize(new_sizes);
+}
+
 void ComputeGraph::propagate_resize() {
   for (std::unique_ptr<ExecuteNode>& node : execute_nodes_) {
     node->trigger_resize(this);
   }
+  encode_execute();
 }
 
 } // namespace vkcompute
diff --git a/backends/vulkan/runtime/graph/ComputeGraph.h b/backends/vulkan/runtime/graph/ComputeGraph.h
index 9f4bab3ac04..fe546f26477 100644
--- a/backends/vulkan/runtime/graph/ComputeGraph.h
+++ b/backends/vulkan/runtime/graph/ComputeGraph.h
@@ -187,6 +187,7 @@ class ComputeGraph final {
 
  protected:
   size_t values_in_use_ = 0;
+  size_t execute_count_ = 0;
 
  public:
   //
@@ -397,6 +398,19 @@ class ComputeGraph final {
   std::optional<T> extract_optional_scalar(const ValueRef idx) {
     if (val_is_none(idx)) {
       return ::std::nullopt;
+    } else if (val_is_symint(idx)) {
+      return utils::safe_downcast<T>(read_symint(idx));
+    } else {
+      return extract_scalar<T>(idx);
+    }
+  }
+
+  template <typename T>
+  T extract_optional_scalar(const ValueRef idx, const T default_val) {
+    if (val_is_none(idx)) {
+      return default_val;
+    } else if (val_is_symint(idx)) {
+      return utils::safe_downcast<T>(read_symint(idx));
     } else {
       return extract_scalar<T>(idx);
     }
@@ -608,6 +622,10 @@ class ComputeGraph final {
    */
   vkapi::BufferBindInfo get_or_create_int_param_buffer(const ValueRef idx);
 
+  vkapi::BufferBindInfo get_or_create_int_param_buffer(
+      const ValueRef idx,
+      const int32_t default_value);
+
   void set_symint(const ValueRef idx, const int32_t val);
 
   int32_t read_symint(const ValueRef idx);
@@ -745,13 +763,16 @@ class ComputeGraph final {
   //
 
   void encode_execute();
-  void execute() const;
+  void execute();
 
   //
   // Dynamic Shape support
   //
 
   void resize_input(const int64_t idx, const std::vector<int64_t>& new_sizes);
+  void virtual_resize(
+      const ValueRef idx,
+      const std::vector<int64_t>& new_sizes);
   void propagate_resize();
 
   //
@@ -762,6 +783,10 @@ class ComputeGraph final {
     return context_->adapter_ptr()->supports_int16_shader_types();
   }
 
+  inline size_t execute_count() const {
+    return execute_count_;
+  }
+
   /*
    * Check whether the GPU supports 8 bit buffers.
    */
diff --git a/backends/vulkan/runtime/graph/ops/DispatchNode.cpp b/backends/vulkan/runtime/graph/ops/DispatchNode.cpp
index 51ff0c122b0..a0d3a4c2e5c 100644
--- a/backends/vulkan/runtime/graph/ops/DispatchNode.cpp
+++ b/backends/vulkan/runtime/graph/ops/DispatchNode.cpp
@@ -46,15 +46,7 @@ void DispatchNode::encode(ComputeGraph* graph) {
 
   std::unique_lock<std::mutex> cmd_lock = context->dispatch_lock();
 
-  std::array<uint8_t, kMaxPushConstantSize> push_constants_data;
-  uint32_t push_constants_offset = 0;
-
-  for (const auto& push_constant : push_constants_) {
-    push_constants_offset += push_constant.write(
-        push_constants_data.data(),
-        push_constants_offset,
-        kMaxPushConstantSize);
-  }
+  write_push_constant_data();
 
   context->report_shader_dispatch_start(
       shader_.kernel_name,
@@ -63,7 +55,7 @@ void DispatchNode::encode(ComputeGraph* graph) {
       node_id_);
 
   vkapi::DescriptorSet descriptor_set = context->get_descriptor_set(
-      shader_, local_workgroup_size_, spec_vars_, push_constants_offset);
+      shader_, local_workgroup_size_, spec_vars_, push_constants_offset_);
 
   uint32_t idx = 0;
   idx = bind_values_to_descriptor_set(
@@ -76,10 +68,20 @@ void DispatchNode::encode(ComputeGraph* graph) {
       pipeline_barrier,
       shader_,
       global_workgroup_size_,
-      push_constants_data.data(),
-      push_constants_offset);
+      push_constants_data_.data(),
+      push_constants_offset_);
 
   context->report_shader_dispatch_end();
 }
 
+void DispatchNode::write_push_constant_data() {
+  push_constants_offset_ = 0;
+  for (const auto& push_constant : push_constants_) {
+    push_constants_offset_ += push_constant.write(
+        push_constants_data_.data(),
+        push_constants_offset_,
+        kMaxPushConstantSize);
+  }
+}
+
 } // namespace vkcompute
diff --git a/backends/vulkan/runtime/graph/ops/DispatchNode.h b/backends/vulkan/runtime/graph/ops/DispatchNode.h
index c45f0a741fd..db95adfee39 100644
--- a/backends/vulkan/runtime/graph/ops/DispatchNode.h
+++ b/backends/vulkan/runtime/graph/ops/DispatchNode.h
@@ -50,6 +50,12 @@ class DispatchNode : public ExecuteNode {
   const vkapi::SpecVarList spec_vars_;
   const std::vector<PushConstantDataInfo> push_constants_;
 
+  // For push constants
+  std::array<uint8_t, kMaxPushConstantSize> push_constants_data_{};
+  uint32_t push_constants_offset_ = 0;
+
+  void write_push_constant_data();
+
  public:
   operator bool() const {
     return shader_;
diff --git a/backends/vulkan/runtime/graph/ops/DynamicDispatchNode.cpp b/backends/vulkan/runtime/graph/ops/DynamicDispatchNode.cpp
index ac84916c6fa..a8d2fe2e99d 100644
--- a/backends/vulkan/runtime/graph/ops/DynamicDispatchNode.cpp
+++ b/backends/vulkan/runtime/graph/ops/DynamicDispatchNode.cpp
@@ -25,9 +25,9 @@ DynamicDispatchNode::DynamicDispatchNode(
     const ResizeFunction& resize_fn)
     : DispatchNode(
           graph,
-          pick_shader_fn(&graph, args, resize_args),
-          pick_global_wg_fn(&graph, args, resize_args),
-          pick_local_wg_fn(&graph, args, resize_args),
+          vkapi::ShaderInfo(),
+          {1u, 1u, 1u},
+          {1u, 1u, 1u},
           args,
           params,
           push_constants,
@@ -36,13 +36,57 @@ DynamicDispatchNode::DynamicDispatchNode(
           resize_fn),
       pick_shader_fn_(pick_shader_fn),
       pick_global_wg_fn_(pick_global_wg_fn),
+      pick_local_wg_fn_(pick_local_wg_fn) {
+  shader_ = pick_shader_fn(&graph, args, resize_args);
+  global_workgroup_size_ =
+      pick_global_wg_fn(&graph, shader_, args, resize_args);
+  local_workgroup_size_ = utils::WorkgroupSize(pick_local_wg_fn(
+      &graph, shader_, global_workgroup_size_, args, resize_args));
+}
+
+DynamicDispatchNode::DynamicDispatchNode(
+    ComputeGraph& graph,
+    const vkapi::ShaderInfo& shader,
+    const PickGlobalFn& pick_global_wg_fn,
+    const PickLocalFn& pick_local_wg_fn,
+    const std::vector<ArgGroup>& args,
+    const vkapi::ParamsBindList& params,
+    const std::vector<PushConstantDataInfo>& push_constants,
+    const vkapi::SpecVarList& spec_vars,
+    const std::vector<ValueRef>& resize_args,
+    const ResizeFunction& resize_fn)
+    : DispatchNode(
+          graph,
+          shader,
+          pick_global_wg_fn(&graph, shader, args, resize_args),
+          pick_local_wg_fn(
+              &graph,
+              shader,
+              pick_global_wg_fn(&graph, shader, args, resize_args),
+              args,
+              resize_args),
+          args,
+          params,
+          push_constants,
+          spec_vars,
+          resize_args,
+          resize_fn),
+      pick_shader_fn_{nullptr},
+      pick_global_wg_fn_(pick_global_wg_fn),
       pick_local_wg_fn_(pick_local_wg_fn) {}
 
 void DynamicDispatchNode::encode(ComputeGraph* graph) {
-  shader_ = pick_shader_fn_(graph, args_, resize_args_);
-  global_workgroup_size_ = pick_global_wg_fn_(graph, args_, resize_args_);
-  local_workgroup_size_ =
-      utils::WorkgroupSize(pick_local_wg_fn_(graph, args_, resize_args_));
+  if (pick_shader_fn_) {
+    shader_ = pick_shader_fn_(graph, args_, resize_args_);
+  }
+  if (pick_global_wg_fn_) {
+    global_workgroup_size_ =
+        pick_global_wg_fn_(graph, shader_, args_, resize_args_);
+  }
+  if (pick_local_wg_fn_) {
+    local_workgroup_size_ = utils::WorkgroupSize(pick_local_wg_fn_(
+        graph, shader_, global_workgroup_size_, args_, resize_args_));
+  }
   DispatchNode::encode(graph);
 }
 
diff --git a/backends/vulkan/runtime/graph/ops/DynamicDispatchNode.h b/backends/vulkan/runtime/graph/ops/DynamicDispatchNode.h
index ede50941415..005151272c3 100644
--- a/backends/vulkan/runtime/graph/ops/DynamicDispatchNode.h
+++ b/backends/vulkan/runtime/graph/ops/DynamicDispatchNode.h
@@ -32,10 +32,13 @@ class DynamicDispatchNode final : public DispatchNode {
       const std::vector<ValueRef>&)>;
   using PickGlobalFn = const std::function<utils::uvec3(
       ComputeGraph*,
+      const vkapi::ShaderInfo& shader,
       const std::vector<ArgGroup>&,
       const std::vector<ValueRef>&)>;
   using PickLocalFn = const std::function<utils::uvec3(
       ComputeGraph*,
+      const vkapi::ShaderInfo& shader,
+      const utils::uvec3& global_workgroup_size,
       const std::vector<ArgGroup>&,
       const std::vector<ValueRef>&)>;
 
@@ -51,6 +54,18 @@ class DynamicDispatchNode final : public DispatchNode {
       const std::vector<ValueRef>& resize_args,
       const ResizeFunction& resize_fn = nullptr);
 
+  explicit DynamicDispatchNode(
+      ComputeGraph& graph,
+      const vkapi::ShaderInfo& shader,
+      const PickGlobalFn& pick_global_wg_fn,
+      const PickLocalFn& pick_local_wg_fn,
+      const std::vector<ArgGroup>& args,
+      const vkapi::ParamsBindList& params,
+      const std::vector<PushConstantDataInfo>& push_constants,
+      const vkapi::SpecVarList& spec_vars,
+      const std::vector<ValueRef>& resize_args,
+      const ResizeFunction& resize_fn = nullptr);
+
   ~DynamicDispatchNode() override = default;
 
   void encode(ComputeGraph* graph) override;
diff --git a/backends/vulkan/runtime/graph/ops/ExecuteNode.h b/backends/vulkan/runtime/graph/ops/ExecuteNode.h
index 7563fc63c71..0731722e13a 100644
--- a/backends/vulkan/runtime/graph/ops/ExecuteNode.h
+++ b/backends/vulkan/runtime/graph/ops/ExecuteNode.h
@@ -65,7 +65,7 @@ class ExecuteNode {
     (void)graph;
   }
 
-  inline void trigger_resize(ComputeGraph* graph) {
+  virtual inline void trigger_resize(ComputeGraph* graph) {
     if (resize_fn_ != nullptr) {
       resize_fn_(graph, args_, resize_args_);
     }
diff --git a/backends/vulkan/runtime/graph/ops/glsl/buffer_to_nchw.yaml b/backends/vulkan/runtime/graph/ops/glsl/buffer_to_nchw.yaml
index 653bda9ccc0..25b3657c2eb 100644
--- a/backends/vulkan/runtime/graph/ops/glsl/buffer_to_nchw.yaml
+++ b/backends/vulkan/runtime/graph/ops/glsl/buffer_to_nchw.yaml
@@ -14,5 +14,6 @@ buffer_to_nchw:
       - VALUE: float
       - VALUE: int
       - VALUE: int8
+      - VALUE: uint8
   shader_variants:
     - NAME: buffer_to_nchw
diff --git a/backends/vulkan/runtime/graph/ops/glsl/conv2d_dw_output_tile.glsl b/backends/vulkan/runtime/graph/ops/glsl/conv2d_dw_output_tile.glsl
index 3265a973980..0ee19206f59 100644
--- a/backends/vulkan/runtime/graph/ops/glsl/conv2d_dw_output_tile.glsl
+++ b/backends/vulkan/runtime/graph/ops/glsl/conv2d_dw_output_tile.glsl
@@ -47,11 +47,6 @@ layout(push_constant) uniform restrict Block {
 
 layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
 
-// For performance improvement, reduce register usage by caching positions in shared memory.
-// Offset index by 1 every 16 points to avoid bank access conflict.
-#define offset_pos_index(index) (index + ((index) >> 4))
-shared ivec3 pos_shared[offset_pos_index(LOCAL_WG_SIZE)];
-
 /*
  * Computes a depthwise convolution. Each shader invocation calculates the
  * output at a single output location.
@@ -77,8 +72,6 @@ void main() {
     return;
   }
 
-  pos_shared[offset_pos_index(gl_LocalInvocationIndex)] = pos;
-
   // Compute the index of the top-left element of the overlay region. Negative
   // indices indicate that the top-left element is in a region added by padding.
   const ivec2 ipos = pos.xy * stride - padding;
@@ -89,13 +82,10 @@ void main() {
   const ivec2 end = ipos + overlay_region.xy;
 
   // sum outputs
-  VEC4_T sum[BATCH_SIZE_Y][BATCH_SIZE_X];
+  VEC4_T sum[BATCH_SIZE_Y * BATCH_SIZE_X];
 
-  sum[0][0] = texelFetch(t_bias, ivec2(pos.z, 0), 0);
-  for (int y = 0; y < BATCH_SIZE_Y; y++) {
-    for (int x = 0; x < BATCH_SIZE_X; x++) {
-      sum[y][x] = sum[0][0];
-    }
+  for (int i = 0; i < BATCH_SIZE_Y * BATCH_SIZE_X; i++) {
+    sum[i] = VEC4_T(0);
   }
 
   // array to store input texels
@@ -115,7 +105,7 @@ void main() {
     if (i > 0) {
       for (int j = 0; j < TILE_SIZE; j++) {
         for (int s = 0; s < BATCH_SIZE_X; s++) {
-          sum[1][s] = fma(in_texels[j + s], prev_kernel_line[j], sum[1][s]);
+          sum[BATCH_SIZE_X + s] = fma(in_texels[j + s], prev_kernel_line[j], sum[BATCH_SIZE_X + s]);
         }
       }
     }
@@ -125,19 +115,19 @@ void main() {
       for (int j = 0; j < TILE_SIZE; j++, kx++) {
         prev_kernel_line[j] = texelFetch(t_kernel, ivec2(kx, pos.z), 0);
         for (int s = 0; s < BATCH_SIZE_X; s++) {
-          sum[0][s] = fma(in_texels[j + s], prev_kernel_line[j], sum[0][s]);
+          sum[s] = fma(in_texels[j + s], prev_kernel_line[j], sum[s]);
         }
       }
     }
   }
 
-  const ivec3 out_pos = pos_shared[offset_pos_index(gl_LocalInvocationIndex)];
+  const VEC4_T bias = texelFetch(t_bias, ivec2(pos.z, 0), 0);
   for (int y = 0; y < BATCH_SIZE_Y; y++) {
     for (int x = 0; x < BATCH_SIZE_X; x++) {
-      if (any(greaterThanEqual(ivec3(out_pos.x + x, out_pos.y + y, out_pos.z), out_limits.xyz))) {
-        continue;
+      const ivec3 out_pos = ivec3(pos.x + x, pos.y + y, pos.z);
+      if (all(lessThan(out_pos.xy, out_limits.xy))) {
+        imageStore(t_out, out_pos, op(sum[y * BATCH_SIZE_X + x] + bias, out_min, out_max));
       }
-      imageStore(t_out, ivec3(out_pos.x + x, out_pos.y + y, out_pos.z), op(sum[y][x], out_min, out_max));
     }
   }
 }
diff --git a/backends/vulkan/runtime/graph/ops/glsl/conv2d_pw.glsl b/backends/vulkan/runtime/graph/ops/glsl/conv2d_pw.glsl
index c218b8ac8cc..cf9714ca468 100644
--- a/backends/vulkan/runtime/graph/ops/glsl/conv2d_pw.glsl
+++ b/backends/vulkan/runtime/graph/ops/glsl/conv2d_pw.glsl
@@ -46,7 +46,9 @@ layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
  * size is only 1x1, making it easier to re-use loaded texels from t_kernel.
  */
 void main() {
-  const int out_limits_scaled[2] = {out_limits.x + (TILE_SIZE_X - 1) * TILE_SIZE_X, out_limits.y + (TILE_SIZE_Y - 1) * TILE_SIZE_Y};
+  const int out_limits_scaled[2] =
+    {(out_limits.x + (TILE_SIZE_X - 1)) / TILE_SIZE_X,
+     (out_limits.y + (TILE_SIZE_Y - 1)) / TILE_SIZE_Y};
 
   const int div_by_x = int(gl_GlobalInvocationID.x / out_limits_scaled[0]);
   const int out_pos[3] = {int(gl_GlobalInvocationID.x % out_limits_scaled[0]), div_by_x, int(gl_GlobalInvocationID.y)};
diff --git a/backends/vulkan/runtime/graph/ops/glsl/conv2d_pw_s1p0.glsl b/backends/vulkan/runtime/graph/ops/glsl/conv2d_pw_s1p0.glsl
index 8ed35d84d0e..a46f1e3b99c 100644
--- a/backends/vulkan/runtime/graph/ops/glsl/conv2d_pw_s1p0.glsl
+++ b/backends/vulkan/runtime/graph/ops/glsl/conv2d_pw_s1p0.glsl
@@ -48,14 +48,17 @@ layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
  * size is only 1x1, making it easier to re-use loaded texels from t_kernel.
  */
 void main() {
-  const int out_limits_scaled[2] = {out_limits.x + (TILE_SIZE_X - 1) * TILE_SIZE_X, out_limits.y + (TILE_SIZE_Y - 1) * TILE_SIZE_Y};
+  const int out_limits_scaled[2] =
+    {(out_limits.x + (TILE_SIZE_X - 1)) / TILE_SIZE_X,
+     (out_limits.y + (TILE_SIZE_Y - 1)) / TILE_SIZE_Y};
 
-  const int div_by_x = int(gl_GlobalInvocationID.x / out_limits_scaled[0]);
-  const int out_pos[3] = {int(gl_GlobalInvocationID.x % out_limits_scaled[0]), div_by_x, int(gl_GlobalInvocationID.y)};
+  const uint16_t div_by_x = uint16_t(gl_GlobalInvocationID.x / out_limits_scaled[0]);
+  const uint16_t out_pos_xy[2] = {uint16_t(gl_GlobalInvocationID.x % out_limits_scaled[0]), div_by_x};
+  const int out_pos_z = int(gl_GlobalInvocationID.y);
 
   // If the top left position is out of bounds, then this invocation will have
   // no work to do.
-  if (out_pos[1] >= out_limits_scaled[1] || out_pos[2] >= out_limits.z) {
+  if (out_pos_xy[1] >= out_limits_scaled[1] || out_pos_z >= out_limits.z) {
     return;
   }
 
@@ -68,8 +71,8 @@ void main() {
   uint16_t pos[TILE_SIZE_X * TILE_SIZE_Y * 2];
   for (uint16_t y = uint16_t(0), i = uint16_t(0); y < TILE_SIZE_Y; ++y) {
     for (uint16_t x = uint16_t(0); x < TILE_SIZE_X; ++x) {
-      pos[i * 2] = uint16_t(out_pos[0]) * TILE_SIZE_X + x;
-      pos[i * 2 + 1] = uint16_t(out_pos[1]) * TILE_SIZE_Y + y;
+      pos[i * 2] = out_pos_xy[0] * TILE_SIZE_X + x;
+      pos[i * 2 + 1] = out_pos_xy[1] * TILE_SIZE_Y + y;
       i++;
     }
   }
@@ -78,14 +81,9 @@ void main() {
   // Tuple of consecutive 4 elements represents a single output texel.
   float sum[TILE_SIZE_X * TILE_SIZE_Y * 4];
 
-  const vec4 bias = texelFetch(t_bias, ivec2(out_pos[2], 0), 0);
-
   // Initialize the output array with the bias value
-  for (int i = 0; i < TILE_SIZE_X * TILE_SIZE_Y * 4; i += 4) {
-    sum[i] = bias.x;
-    sum[i + 1] = bias.y;
-    sum[i + 2] = bias.z;
-    sum[i + 3] = bias.w;
+  for (int i = 0; i < TILE_SIZE_X * TILE_SIZE_Y * 4; i++) {
+    sum[i] = 0;
   }
 
   int z4 = 0;
@@ -98,7 +96,7 @@ void main() {
 
     // Load kernel values from texels to array
     [[unroll]] for (int i = 0; i < 4; ++i) {
-      const vec4 k_tex = texelFetch(t_kernel, ivec2(z + i, out_pos[2]), 0);
+      const vec4 k_tex = texelFetch(t_kernel, ivec2(z + i, out_pos_z), 0);
       kernel_values[i * 4 + 0] = k_tex.x;
       kernel_values[i * 4 + 1] = k_tex.y;
       kernel_values[i * 4 + 2] = k_tex.z;
@@ -156,10 +154,13 @@ void main() {
     }
   }
 
+  const vec4 bias = texelFetch(t_bias, ivec2(out_pos_z, 0), 0);
+
   for (int i = 0; i < TILE_SIZE_X * TILE_SIZE_Y; ++i) {
-    const ivec3 pos_l = ivec3(pos[i * 2], pos[i * 2 + 1], out_pos[2]);
-    if (all(lessThan(pos_l, out_limits.xyz))) {
-      imageStore(t_out, pos_l, op(vec4(sum[i * 4], sum[i * 4 + 1], sum[i * 4 + 2], sum[i * 4 + 3]), out_min, out_max));
+    const ivec3 pos_l = ivec3(pos[i * 2], pos[i * 2 + 1], out_pos_z);
+    if (all(lessThan(pos_l.xy, out_limits.xy))) {
+      const vec4 out_sum = vec4(sum[i * 4], sum[i * 4 + 1], sum[i * 4 + 2], sum[i * 4 + 3]);
+      imageStore(t_out, pos_l, op(out_sum + bias, out_min, out_max));
     }
   }
 }
diff --git a/backends/vulkan/runtime/graph/ops/glsl/image_to_nchw.yaml b/backends/vulkan/runtime/graph/ops/glsl/image_to_nchw.yaml
index 8fc9340d9d0..c1045d93afc 100644
--- a/backends/vulkan/runtime/graph/ops/glsl/image_to_nchw.yaml
+++ b/backends/vulkan/runtime/graph/ops/glsl/image_to_nchw.yaml
@@ -15,6 +15,7 @@ image_to_nchw:
       - VALUE: float
       - VALUE: int
       - VALUE: int8
+      - VALUE: uint8
   shader_variants:
     - NAME: image_to_nchw_texture3d
     - NAME: image_to_nchw_texture2d
diff --git a/backends/vulkan/runtime/graph/ops/glsl/nchw_to_bitw8_image_nobitw8buffer.glsl b/backends/vulkan/runtime/graph/ops/glsl/nchw_to_bitw8_image_nobitw8buffer.glsl
index 327c3868847..4b18abbb1c5 100644
--- a/backends/vulkan/runtime/graph/ops/glsl/nchw_to_bitw8_image_nobitw8buffer.glsl
+++ b/backends/vulkan/runtime/graph/ops/glsl/nchw_to_bitw8_image_nobitw8buffer.glsl
@@ -22,7 +22,13 @@ layout(std430) buffer;
 
 ${layout_declare_tensor(B, "w", "t_out", DTYPE, STORAGE)}
 ${layout_declare_buffer(B, "r", "nchw_in", "int")}
-${layout_declare_ubo(B, "ivec4", "sizes")}
+
+$if USE_PUSH_CONST:
+  layout(push_constant) uniform restrict Block {
+    ivec4 sizes;
+  };
+$else:
+  ${layout_declare_ubo(B, "ivec4", "sizes")}
 
 layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
 
diff --git a/backends/vulkan/runtime/graph/ops/glsl/nchw_to_bitw8_image_nobitw8buffer.yaml b/backends/vulkan/runtime/graph/ops/glsl/nchw_to_bitw8_image_nobitw8buffer.yaml
index 506a66c0d27..0b8bbecb7bd 100644
--- a/backends/vulkan/runtime/graph/ops/glsl/nchw_to_bitw8_image_nobitw8buffer.yaml
+++ b/backends/vulkan/runtime/graph/ops/glsl/nchw_to_bitw8_image_nobitw8buffer.yaml
@@ -8,6 +8,7 @@ nchw_to_bitw8_image_nobitw8buffer:
   parameter_names_with_default_values:
     STORAGE: texture3d
     DTYPE: int8
+    USE_PUSH_CONST: True
   generate_variant_forall:
     STORAGE:
       - VALUE: texture2d
@@ -17,3 +18,5 @@ nchw_to_bitw8_image_nobitw8buffer:
       - VALUE: uint8
   shader_variants:
     - NAME: nchw_to_bitw8_image_nobitw8buffer
+    - NAME: nchw_to_bitw8_image_nobitw8buffer_no_pc
+      USE_PUSH_CONST: False
diff --git a/backends/vulkan/runtime/graph/ops/glsl/nchw_to_buffer.glsl b/backends/vulkan/runtime/graph/ops/glsl/nchw_to_buffer.glsl
index 32235a9ad65..ba4e4dd9dd9 100644
--- a/backends/vulkan/runtime/graph/ops/glsl/nchw_to_buffer.glsl
+++ b/backends/vulkan/runtime/graph/ops/glsl/nchw_to_buffer.glsl
@@ -12,9 +12,17 @@ layout(std430) buffer;
 
 ${layout_declare_tensor(0, "w", "t_out", DTYPE, STORAGE)}
 ${layout_declare_tensor(1, "r", "nchw_in", DTYPE, STORAGE)}
-${layout_declare_ubo(2, "ivec4", "out_sizes")}
-${layout_declare_ubo(3, "ivec4", "out_strides")}
-${layout_declare_ubo(4, "int", "numel")}
+
+$if USE_PUSH_CONST:
+  layout(push_constant) uniform restrict Block {
+    ivec4 out_sizes;
+    ivec4 out_strides;
+    int numel;
+  };
+$else:
+  ${layout_declare_ubo(2, "ivec4", "out_sizes")}
+  ${layout_declare_ubo(3, "ivec4", "out_strides")}
+  ${layout_declare_ubo(4, "int", "numel")}
 
 layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
 
diff --git a/backends/vulkan/runtime/graph/ops/glsl/nchw_to_buffer.yaml b/backends/vulkan/runtime/graph/ops/glsl/nchw_to_buffer.yaml
index 6292ef93337..486d710cf55 100644
--- a/backends/vulkan/runtime/graph/ops/glsl/nchw_to_buffer.yaml
+++ b/backends/vulkan/runtime/graph/ops/glsl/nchw_to_buffer.yaml
@@ -8,11 +8,15 @@ nchw_to_buffer:
   parameter_names_with_default_values:
     DTYPE: float
     STORAGE: buffer
+    USE_PUSH_CONST: True
   generate_variant_forall:
     DTYPE:
       - VALUE: half
       - VALUE: float
       - VALUE: int
       - VALUE: int8
+      - VALUE: uint8
   shader_variants:
     - NAME: nchw_to_buffer
+    - NAME: nchw_to_buffer_no_pc
+      USE_PUSH_CONST: False
diff --git a/backends/vulkan/runtime/graph/ops/glsl/nchw_to_image.glsl b/backends/vulkan/runtime/graph/ops/glsl/nchw_to_image.glsl
index 2f55535c82c..4674822ce6a 100644
--- a/backends/vulkan/runtime/graph/ops/glsl/nchw_to_image.glsl
+++ b/backends/vulkan/runtime/graph/ops/glsl/nchw_to_image.glsl
@@ -21,9 +21,17 @@ layout(std430) buffer;
 
 ${layout_declare_tensor(B, "w", "t_out", DTYPE, STORAGE)}
 ${layout_declare_buffer(B, "r", "buf_in", DTYPE)}
-${layout_declare_ubo(B, "ivec4", "sizes")}
-$if not FROM_STAGING:
-  ${layout_declare_ubo(B, "ivec4", "buf_strides")}
+
+$if USE_PUSH_CONST:
+  layout(push_constant) uniform restrict Block {
+    ivec4 sizes;
+  $if not FROM_STAGING:
+    ivec4 buf_strides;
+  };
+$else:
+  ${layout_declare_ubo(B, "ivec4", "sizes")}
+  $if not FROM_STAGING:
+    ${layout_declare_ubo(B, "ivec4", "buf_strides")}
 
 #include "indexing_utils.h"
 
diff --git a/backends/vulkan/runtime/graph/ops/glsl/nchw_to_image.yaml b/backends/vulkan/runtime/graph/ops/glsl/nchw_to_image.yaml
index f44e1f74bfe..7e52ec10376 100644
--- a/backends/vulkan/runtime/graph/ops/glsl/nchw_to_image.yaml
+++ b/backends/vulkan/runtime/graph/ops/glsl/nchw_to_image.yaml
@@ -9,15 +9,25 @@ nchw_to_image:
     STORAGE: texture3d
     DTYPE: float
     FROM_STAGING: True
+    USE_PUSH_CONST: True
   generate_variant_forall:
     DTYPE:
       - VALUE: half
       - VALUE: float
       - VALUE: int
       - VALUE: int8
+      - VALUE: uint8
   shader_variants:
     - NAME: nchw_to_image_texture3d
     - NAME: nchw_to_image_texture2d
       STORAGE: texture2d
     - NAME: clone_buffer_to_image
       FROM_STAGING: False
+    - NAME: nchw_to_image_no_pc_texture3d
+      USE_PUSH_CONST: False
+    - NAME: nchw_to_image_no_pc_texture2d
+      STORAGE: texture2d
+      USE_PUSH_CONST: False
+    - NAME: clone_buffer_to_image_no_pc
+      FROM_STAGING: False
+      USE_PUSH_CONST: False
diff --git a/backends/vulkan/runtime/graph/ops/glsl/select.glslh b/backends/vulkan/runtime/graph/ops/glsl/select.glslh
new file mode 100644
index 00000000000..3bcbf04a3ba
--- /dev/null
+++ b/backends/vulkan/runtime/graph/ops/glsl/select.glslh
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#ifndef SELECT_GLSLH
+#define SELECT_GLSLH
+
+/*
+ * Enable the fast path if a texel loaded from the input texture can be used as
+ * is to store to the output texture. The following conditions must be met:
+ *
+ * 1. The input and output textures have the same packed dimension.
+ * 2. The selected_dim must not be the packed dimension of the input.
+ * 3. The packed dimension of the input must "map" to the packed dimension of
+ *    the output. This occurs if selected_dim is greater than the packed dimension
+ *    of the input.
+ */
+bool can_use_fast_path() {
+  if (out_packed_dim != in_packed_dim) {
+    return false;
+  }
+  if (selected_dim <= in_packed_dim) {
+    return false;
+  }
+  return true;
+}
+
+/*
+ * Given an output tensor index, return the corresponding input tensor index for
+ * the select operator. This is done by "inserting" the select index at the
+ * selected_dim in the input tensor index.
+ *
+ * A simple example is (note all tensor index are in WHCN order):
+ *   out_tidx = [7, 5, 9]
+ *   selected_dim = 2
+ *   index = 3
+ *   in_tidx = [7, 3, 5, 9]
+ *
+ * This function assumes that the following variables are defined in the layout:
+ * - in_sizes
+ * - selected_dim
+ * - index
+ */
+ivec4 out_tidx_to_in_tidx(const ivec4 out_tidx) {
+  ivec4 in_tidx = ivec4(0);
+
+  int adjusted_index = index;
+  if (index < 0) {
+    adjusted_index = index + in_sizes[selected_dim];
+  }
+
+  // Handle different dimensions for selection
+  if (selected_dim == 0) {
+    // Select from width dimension
+    in_tidx = ivec4(adjusted_index, out_tidx.x, out_tidx.y, out_tidx.z);
+  } else if (selected_dim == 1) {
+    // Select from height dimension
+    in_tidx = ivec4(out_tidx.x, adjusted_index, out_tidx.y, out_tidx.z);
+  } else if (selected_dim == 2) {
+    // Select from channel dimension
+    in_tidx = ivec4(out_tidx.x, out_tidx.y, adjusted_index, out_tidx.z);
+  } else if (selected_dim == 3) {
+    // Select from batch dimension
+    in_tidx = ivec4(out_tidx.x, out_tidx.y, out_tidx.z, adjusted_index);
+  }
+
+  return in_tidx;
+}
+
+#endif // SELECT_GLSLH
diff --git a/backends/vulkan/runtime/graph/ops/glsl/select_batch_4d.glsl b/backends/vulkan/runtime/graph/ops/glsl/select_batch_4d.glsl
deleted file mode 100644
index f94e1120492..00000000000
--- a/backends/vulkan/runtime/graph/ops/glsl/select_batch_4d.glsl
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-#version 450 core
-
-#define PRECISION ${PRECISION}
-
-layout(std430) buffer;
-
-#include "indexing_utils.h"
-
-layout(set = 0, binding = 0, ${IMAGE_FORMAT[DTYPE]}) uniform PRECISION restrict writeonly ${IMAGE_T[NDIM][DTYPE]} image_out;
-layout(set = 0, binding = 1) uniform PRECISION sampler3D image_in;
-
-layout(set = 0, binding = 2) uniform PRECISION restrict OutLimits {
-  ivec3 out_limits;
-};
-
-layout(set = 0, binding = 3) uniform PRECISION restrict Sizes {
-  ivec4 sizes;
-};
-
-layout(set = 0, binding = 4) uniform PRECISION restrict SelectVal {
-  // data.x: index along batch dim to select
-  // data.y: number of batches
-  // data.z: number of texels per batch
-  // data.w: unused
-  ivec4 select_info;
-};
-
-layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
-
-void main() {
-  const int num_batches = select_info.y;
-  const int num_texel_per_batch = select_info.z;
-  const int index = select_info.x;
-
-  const ivec3 pos = ivec3(gl_GlobalInvocationID);
-
-  if (any(greaterThanEqual(pos, out_limits))) {
-    return;
-  }
-
-  const uint src_pos_z = (num_texel_per_batch * index) + pos.z;
-  imageStore(
-      image_out, pos, texelFetch(image_in, ivec3(pos.x, pos.y, src_pos_z), 0));
-}
diff --git a/backends/vulkan/runtime/graph/ops/glsl/select_channel_3d.glsl b/backends/vulkan/runtime/graph/ops/glsl/select_channel_3d.glsl
deleted file mode 100644
index 0bbec798484..00000000000
--- a/backends/vulkan/runtime/graph/ops/glsl/select_channel_3d.glsl
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-#version 450 core
-
-#define PRECISION ${PRECISION}
-
-#define VEC4_T ${texel_type(DTYPE)}
-#define T ${texel_component_type(DTYPE)}
-
-layout(std430) buffer;
-
-#include "indexing_utils.h"
-
-layout(set = 0, binding = 0, ${IMAGE_FORMAT[DTYPE]}) uniform PRECISION restrict writeonly ${IMAGE_T[NDIM][DTYPE]} image_out;
-layout(set = 0, binding = 1) uniform PRECISION sampler3D image_in;
-
-layout(set = 0, binding = 2) uniform PRECISION restrict OutLimits {
-  ivec3 out_limits;
-};
-
-layout(set = 0, binding = 3) uniform PRECISION restrict Sizes {
-  ivec4 sizes;
-};
-
-// index to select
-layout(set = 0, binding = 4) uniform PRECISION restrict IndexVal {
-  int index;
-};
-
-layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
-
-void main() {
-  const ivec3 pos = ivec3(gl_GlobalInvocationID);
-
-  if (any(greaterThanEqual(pos, out_limits))) {
-    return;
-  }
-
-  const int tex = index / 4;
-  const int ind = index % 4;
-  const T v = VEC4_T(texelFetch(image_in, ivec3(pos.x, pos.y, tex), 0))[ind];
-
-  imageStore(image_out, ivec3(pos.x, pos.y, 0), VEC4_T(v, 0, 0, 0));
-}
diff --git a/backends/vulkan/runtime/graph/ops/glsl/select_channel_4d.glsl b/backends/vulkan/runtime/graph/ops/glsl/select_channel_4d.glsl
deleted file mode 100644
index 517362f76ea..00000000000
--- a/backends/vulkan/runtime/graph/ops/glsl/select_channel_4d.glsl
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-#version 450 core
-
-#define PRECISION ${PRECISION}
-#define VEC4_T ${texel_type(DTYPE)}
-
-layout(std430) buffer;
-
-#include "indexing_utils.h"
-
-layout(set = 0, binding = 0, ${IMAGE_FORMAT[DTYPE]}) uniform PRECISION restrict writeonly ${IMAGE_T[NDIM][DTYPE]} image_out;
-layout(set = 0, binding = 1) uniform PRECISION sampler3D image_in;
-
-layout(set = 0, binding = 2) uniform PRECISION restrict OutLimits {
-  ivec3 out_limits;
-};
-
-layout(set = 0, binding = 3) uniform PRECISION restrict Sizes {
-  ivec4 sizes;
-};
-
-layout(set = 0, binding = 4) uniform PRECISION restrict SelectVal {
-  // data.x: index along channel dim to select
-  // data.y: number of batches
-  // data.z: number of texels per batch
-  // data.w: unused
-  ivec4 select_info;
-};
-
-layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
-
-void main() {
-  const ivec3 pos = ivec3(gl_GlobalInvocationID);
-
-  if (any(greaterThanEqual(pos, out_limits))) {
-    return;
-  }
-
-  const int num_batches = select_info.y;
-  const int num_texel_per_batch = select_info.z;
-  const int index = select_info.x;
-
-  // read in the same channel from 4 separate batches
-  VEC4_T out_texel = VEC4_T(0, 0, 0, 0);
-  for (int k = 0; k < 4; k++) {
-    if ((k + pos.z * 4) >=
-        num_batches) {
-      break;
-    }
-    const uint src_pos_z = (4 * num_texel_per_batch * pos.z) +
-        (k * num_texel_per_batch) + (index / 4);
-    const uint src_pos_t = index % 4;
-    out_texel[k] =
-        VEC4_T(texelFetch(image_in, ivec3(pos.x, pos.y, src_pos_z), 0))[src_pos_t];
-  }
-
-  imageStore(image_out, pos, out_texel);
-}
diff --git a/backends/vulkan/runtime/graph/ops/glsl/select_height_3d.glsl b/backends/vulkan/runtime/graph/ops/glsl/select_height_3d.glsl
deleted file mode 100644
index 87409fb35fd..00000000000
--- a/backends/vulkan/runtime/graph/ops/glsl/select_height_3d.glsl
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-#version 450 core
-
-#define PRECISION ${PRECISION}
-#define VEC4_T ${texel_type(DTYPE)}
-
-layout(std430) buffer;
-
-#include "indexing_utils.h"
-
-layout(set = 0, binding = 0, ${IMAGE_FORMAT[DTYPE]}) uniform PRECISION restrict writeonly ${IMAGE_T[NDIM][DTYPE]} image_out;
-layout(set = 0, binding = 1) uniform PRECISION sampler3D image_in;
-
-layout(set = 0, binding = 2) uniform PRECISION restrict OutLimits {
-  ivec3 out_limits;
-};
-
-layout(set = 0, binding = 3) uniform PRECISION restrict Sizes {
-  ivec4 sizes;
-};
-
-// index to select
-layout(set = 0, binding = 4) uniform PRECISION restrict IndexVal {
-  int index;
-};
-
-layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
-
-void main() {
-  const ivec3 pos = ivec3(gl_GlobalInvocationID);
-
-  if (any(greaterThanEqual(pos, out_limits))) {
-    return;
-  }
-
-  // w
-  const int src_x = pos.x;
-  // h
-  const int src_y = index;
-  // c
-  const int src_z = pos.y;
-
-  const VEC4_T v = VEC4_T(texelFetch(image_in, ivec3(src_x, src_y, src_z), 0));
-
-  for (int i = 0; i < 4; i++) {
-    ivec3 new_pos = ivec3(pos.x, pos.y * 4 + i, 0);
-
-    // When the C-channel exceeds original block size, exit early
-    if (new_pos.y >= sizes.y) {
-      return;
-    }
-
-    imageStore(image_out, new_pos, VEC4_T(v[i], 0, 0, 0));
-  }
-}
diff --git a/backends/vulkan/runtime/graph/ops/glsl/select_height_3d.yaml b/backends/vulkan/runtime/graph/ops/glsl/select_height_3d.yaml
deleted file mode 100644
index a373f1decd9..00000000000
--- a/backends/vulkan/runtime/graph/ops/glsl/select_height_3d.yaml
+++ /dev/null
@@ -1,10 +0,0 @@
-select_height_3d:
-  parameter_names_with_default_values:
-    DTYPE: float
-    NDIM: 3
-  generate_variant_forall:
-    DTYPE:
-      - VALUE: half
-      - VALUE: float
-  shader_variants:
-    - NAME: select_height_3d
diff --git a/backends/vulkan/runtime/graph/ops/glsl/select_height_4d.glsl b/backends/vulkan/runtime/graph/ops/glsl/select_height_4d.glsl
deleted file mode 100644
index 2e4e2afb2db..00000000000
--- a/backends/vulkan/runtime/graph/ops/glsl/select_height_4d.glsl
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-#version 450 core
-
-#define PRECISION ${PRECISION}
-#define VEC4_T ${texel_type(DTYPE)}
-
-layout(std430) buffer;
-
-#include "indexing_utils.h"
-
-layout(set = 0, binding = 0, ${IMAGE_FORMAT[DTYPE]}) uniform PRECISION restrict writeonly ${IMAGE_T[NDIM][DTYPE]} image_out;
-layout(set = 0, binding = 1) uniform PRECISION sampler3D image_in;
-
-layout(set = 0, binding = 2) uniform PRECISION restrict OutLimits {
-  ivec3 out_limits;
-};
-
-layout(set = 0, binding = 3) uniform PRECISION restrict Sizes {
-  ivec4 sizes;
-};
-
-// index to select
-layout(set = 0, binding = 4) uniform PRECISION restrict IndexVal {
-  // data.x: index along height dim to select
-  // data.y: number of batches
-  // data.z: number of texels per batch
-  // data.w: unused
-  ivec4 select_info;
-};
-
-layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
-
-void main() {
-  const ivec3 pos = ivec3(gl_GlobalInvocationID);
-
-  if (any(greaterThanEqual(pos, out_limits))) {
-    return;
-  }
-
-  const int num_batches = select_info.y;
-  const int num_texel_per_batch = select_info.z;
-  const int index = select_info.x;
-
-  VEC4_T out_texel = VEC4_T(0, 0, 0, 0);
-  // read in the same channel from 4 separate batches
-  for (int k = 0; k < 4; k++) {
-    if ((k + pos.z * 4) >= num_batches
-        ) { // < 4 batches for this texel, exit early
-      break;
-    }
-    const uint src_pos_z = (pos.z * num_texel_per_batch * 4) +
-        k * num_texel_per_batch + (pos.y / 4);
-    out_texel[k] = VEC4_T(texelFetch(
-        image_in, ivec3(pos.x, index, src_pos_z), 0))[pos.y % 4];
-  }
-  imageStore(image_out, pos, out_texel);
-}
diff --git a/backends/vulkan/runtime/graph/ops/glsl/select_height_4d.yaml b/backends/vulkan/runtime/graph/ops/glsl/select_height_4d.yaml
deleted file mode 100644
index c3724f1157a..00000000000
--- a/backends/vulkan/runtime/graph/ops/glsl/select_height_4d.yaml
+++ /dev/null
@@ -1,10 +0,0 @@
-select_height_4d:
-  parameter_names_with_default_values:
-    DTYPE: float
-    NDIM: 3
-  generate_variant_forall:
-    DTYPE:
-      - VALUE: half
-      - VALUE: float
-  shader_variants:
-    - NAME: select_height_4d
diff --git a/backends/vulkan/runtime/graph/ops/glsl/select_width_3d.glsl b/backends/vulkan/runtime/graph/ops/glsl/select_width_3d.glsl
deleted file mode 100644
index 1e12d15ab21..00000000000
--- a/backends/vulkan/runtime/graph/ops/glsl/select_width_3d.glsl
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-#version 450 core
-
-#define PRECISION ${PRECISION}
-
-#define VEC4_T ${texel_type(DTYPE)}
-
-layout(std430) buffer;
-
-#include "indexing_utils.h"
-
-layout(set = 0, binding = 0, ${IMAGE_FORMAT[DTYPE]}) uniform PRECISION restrict writeonly ${IMAGE_T[NDIM][DTYPE]} image_out;
-layout(set = 0, binding = 1) uniform PRECISION sampler3D image_in;
-
-layout(set = 0, binding = 2) uniform PRECISION restrict OutLimits {
-  ivec3 out_limits;
-};
-
-layout(set = 0, binding = 3) uniform PRECISION restrict Sizes {
-  ivec4 sizes;
-};
-
-// index to select
-layout(set = 0, binding = 4) uniform PRECISION restrict IndexVal {
-  int index;
-};
-
-layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
-
-void main() {
-  const ivec3 pos = ivec3(gl_GlobalInvocationID);
-
-  if (any(greaterThanEqual(pos, out_limits))) {
-    return;
-  }
-
-  // w
-  const int src_x = index;
-  // h
-  const int src_y = pos.x;
-  // c
-  const int src_z = pos.y;
-
-  const VEC4_T v = VEC4_T(texelFetch(image_in, ivec3(src_x, src_y, src_z), 0));
-
-  for (int i = 0; i < 4; i++) {
-    ivec3 new_pos = ivec3(pos.x, pos.y * 4 + i, 0);
-
-    // When the C-channel exceeds original block size, exit early
-    if (new_pos.y >= sizes.y) {
-      return;
-    }
-
-    imageStore(image_out, new_pos, VEC4_T(v[i], 0, 0, 0));
-  }
-}
diff --git a/backends/vulkan/runtime/graph/ops/glsl/select_width_3d.yaml b/backends/vulkan/runtime/graph/ops/glsl/select_width_3d.yaml
deleted file mode 100644
index a3070bf6ca3..00000000000
--- a/backends/vulkan/runtime/graph/ops/glsl/select_width_3d.yaml
+++ /dev/null
@@ -1,10 +0,0 @@
-select_width_3d:
-  parameter_names_with_default_values:
-    DTYPE: float
-    NDIM: 3
-  generate_variant_forall:
-    DTYPE:
-      - VALUE: half
-      - VALUE: float
-  shader_variants:
-    - NAME: select_width_3d
diff --git a/backends/vulkan/runtime/graph/ops/glsl/select_width_4d.glsl b/backends/vulkan/runtime/graph/ops/glsl/select_width_4d.glsl
deleted file mode 100644
index ffbd8afbda0..00000000000
--- a/backends/vulkan/runtime/graph/ops/glsl/select_width_4d.glsl
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-#version 450 core
-
-#define PRECISION ${PRECISION}
-
-#define VEC4_T ${texel_type(DTYPE)}
-
-layout(std430) buffer;
-
-#include "indexing_utils.h"
-
-layout(set = 0, binding = 0, ${IMAGE_FORMAT[DTYPE]}) uniform PRECISION restrict writeonly ${IMAGE_T[NDIM][DTYPE]} image_out;
-layout(set = 0, binding = 1) uniform PRECISION sampler3D image_in;
-
-layout(set = 0, binding = 2) uniform PRECISION restrict OutLimits {
-  ivec3 out_limits;
-};
-
-layout(set = 0, binding = 3) uniform PRECISION restrict Sizes {
-  ivec4 sizes;
-};
-
-// index to select
-layout(set = 0, binding = 4) uniform PRECISION restrict SelectVal {
-  // data.x: index along width dim to select
-  // data.y: number of batches
-  // data.z: number of texels per batch
-  // data.w: unused
-  ivec4 select_info;
-};
-
-layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
-
-void main() {
-  const ivec3 pos = ivec3(gl_GlobalInvocationID);
-
-  if (any(greaterThanEqual(pos, out_limits))) {
-    return;
-  }
-
-  const int num_batches = select_info.y;
-  const int num_texel_per_batch = select_info.z;
-  const int index = select_info.x;
-
-  //vec4 out_texel = vec4(0, 0, 0, 0);
-  VEC4_T out_texel = VEC4_T(0, 0, 0, 0);
-  // read in the same channel from 4 separate batches
-  for (int k = 0; k < 4; k++) {
-    if ((k + pos.z * 4) >=
-        num_batches) { // < 4 batches for this texel, exit early
-      break;
-    }
-    const uint src_pos_z = (pos.z * num_texel_per_batch * 4) +
-        k * num_texel_per_batch + (pos.y / 4);
-
-    out_texel[k] = VEC4_T(texelFetch(
-        image_in, ivec3(index, pos.x, src_pos_z), 0))[pos.y % 4];
-  }
-  imageStore(image_out, pos, out_texel);
-}
diff --git a/backends/vulkan/runtime/graph/ops/glsl/select_width_4d.yaml b/backends/vulkan/runtime/graph/ops/glsl/select_width_4d.yaml
deleted file mode 100644
index f1131d77395..00000000000
--- a/backends/vulkan/runtime/graph/ops/glsl/select_width_4d.yaml
+++ /dev/null
@@ -1,10 +0,0 @@
-select_width_4d:
-  parameter_names_with_default_values:
-    DTYPE: float
-    NDIM: 3
-  generate_variant_forall:
-    DTYPE:
-      - VALUE: half
-      - VALUE: float
-  shader_variants:
-    - NAME: select_width_4d
diff --git a/backends/vulkan/runtime/graph/ops/glsl/slice.glslh b/backends/vulkan/runtime/graph/ops/glsl/slice.glslh
new file mode 100644
index 00000000000..5d4cc70fdc1
--- /dev/null
+++ b/backends/vulkan/runtime/graph/ops/glsl/slice.glslh
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#ifndef SLICE_GLSLH
+#define SLICE_GLSLH
+
+/**
+ * Enable the fast path if a texel loaded from the input texture can be used as
+ * is to store to the output texture. The following conditions must be met:
+ *
+ * 1. The input and output textures have the same packed dimension.
+ * 2. The select_dim must not be the packed dimension of the input.
+ */
+bool can_use_fast_path() {
+  if (out_packed_dim != in_packed_dim) {
+    return false;
+  }
+  if (in_packed_dim == selected_dim) {
+    return false;
+  }
+  return true;
+}
+
+/*
+ * Converts output tensor indices to input tensor indices for the slice operation.
+ * This function maps the output indices to the corresponding input indices based on
+ * the slice parameters (start, step, selected_dim).
+ *
+ * Parameters assumed to be defined in the layout specifier:
+ * - in_sizes
+ * - selected_dim
+ * - start
+ * - step
+ */
+ivec4 out_tidx_to_in_tidx(const ivec4 out_tidx) {
+  ivec4 in_tidx = out_tidx;
+
+  int adjusted_start = start;
+  if (start < 0) {
+    adjusted_start = start + in_sizes[selected_dim];
+  }
+
+  in_tidx[selected_dim] = adjusted_start + out_tidx[selected_dim] * step;
+
+  return in_tidx;
+}
+
+#endif // SLICE_GLSLH
diff --git a/backends/vulkan/runtime/graph/ops/glsl/slice_packed_dim.glsl b/backends/vulkan/runtime/graph/ops/glsl/slice_packed_dim.glsl
deleted file mode 100644
index 0a6fa31a65f..00000000000
--- a/backends/vulkan/runtime/graph/ops/glsl/slice_packed_dim.glsl
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-#version 450 core
-
-#define PRECISION ${PRECISION}
-
-#define VEC4_T ${texel_type(DTYPE)}
-
-layout(std430) buffer;
-
-#include "indexing_utils.h"
-
-${layout_declare_tensor(0, "w", "t_out", DTYPE, STORAGE)}
-${layout_declare_tensor(1, "r", "t_in", DTYPE, STORAGE)}
-${layout_declare_ubo(2, "ivec4", "out_sizes")}
-${layout_declare_ubo(3, "ivec4", "in_sizes")}
-
-layout(set = 0, binding = 4) uniform PRECISION restrict SliceArg {
-  int offset;
-  int step;
-}
-slice_arg;
-
-layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
-
-layout(constant_id = 3) const int packed_dim = C_DIM;
-
-void main() {
-  const ivec3 out_pos = ivec3(gl_GlobalInvocationID);
-  const ivec4 idx = to_tensor_idx(out_pos, out_sizes, packed_dim);
-
-  if (any(greaterThanEqual(idx, out_sizes))) {
-    return;
-  }
-
-  // We map the output pos using the buffer index.  For each index in the texel,
-  // we calculate the source whcn-coordinate amended with offset-ed channel
-  // value.  Then we calculate the actual texture position from the
-  // whcn-coordinate.
-  const ivec4 buf_indices = tidx_to_nchwi(idx, out_sizes, packed_dim);
-
-  vec4 outex;
-  for (int i=0;i<4;i++) {
-      ivec4 user_coor = nchwi_to_tidx(buf_indices[i], out_sizes);
-
-      int in_dim = user_coor[packed_dim];
-
-      ivec4 in_user_coor = user_coor;
-      in_user_coor[packed_dim] = slice_arg.offset + in_dim * slice_arg.step;
-
-      ivec4 in_pow_elem = to_texture_elem_pos(
-        in_user_coor,
-        in_sizes,
-        packed_dim);
-
-      vec4 v = texelFetch(t_in, in_pow_elem.xyz, 0);
-
-      outex[i] = v[in_pow_elem.w];
-  }
-  imageStore(t_out, out_pos, outex);
-}
diff --git a/backends/vulkan/runtime/graph/ops/glsl/slice_packed_dim.yaml b/backends/vulkan/runtime/graph/ops/glsl/slice_packed_dim.yaml
deleted file mode 100644
index 718e7316824..00000000000
--- a/backends/vulkan/runtime/graph/ops/glsl/slice_packed_dim.yaml
+++ /dev/null
@@ -1,11 +0,0 @@
-slice_packed_dim:
-  parameter_names_with_default_values:
-    DTYPE: float
-    NDIM: 3
-    STORAGE: texture3d
-  generate_variant_forall:
-    DTYPE:
-      - VALUE: half
-      - VALUE: float
-  shader_variants:
-    - NAME: slice_packed_dim
diff --git a/backends/vulkan/runtime/graph/ops/glsl/slice_unpacked_dim.glsl b/backends/vulkan/runtime/graph/ops/glsl/slice_unpacked_dim.glsl
deleted file mode 100644
index 54f0bd0b78c..00000000000
--- a/backends/vulkan/runtime/graph/ops/glsl/slice_unpacked_dim.glsl
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-#version 450 core
-
-#define PRECISION ${PRECISION}
-
-#define VEC4_T ${texel_type(DTYPE)}
-
-layout(std430) buffer;
-
-#include "indexing_utils.h"
-
-layout(set = 0, binding = 0, ${IMAGE_FORMAT[DTYPE]}) uniform PRECISION restrict writeonly ${IMAGE_T[NDIM][DTYPE]} image_out;
-layout(set = 0, binding = 1) uniform PRECISION sampler3D image_in;
-
-layout(set = 0, binding = 2) uniform PRECISION restrict Sizes {
-  ivec4 sizes;
-};
-
-layout(set = 0, binding = 3) uniform PRECISION restrict SliceArg {
-  int dim;
-  int offset;
-  int step;
-  int image_in_channel_size;
-}
-slice_arg;
-
-layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
-
-layout(constant_id = 3) const int packed_dim = C_DIM;
-
-void main() {
-  const ivec3 pos = ivec3(gl_GlobalInvocationID);
-
-  if (pos_out_of_bounds(pos, sizes, packed_dim)) {
-    return;
-  }
-
-  ivec3 in_pos = pos;
-
-  // slice along batch axis
-  if (slice_arg.dim == 3) {
-    // index of the channel inside a batch
-    const int chanl_index = pos.z % slice_arg.image_in_channel_size;
-    // index of batch
-    const int batch_index = pos.z / slice_arg.image_in_channel_size;
-    in_pos.z = (slice_arg.offset + batch_index * slice_arg.step) * slice_arg.image_in_channel_size + chanl_index;
-  } else if (slice_arg.dim == C_DIM) {
-    // index of the channel inside a batch
-    const int chanl_index = pos.z % sizes.z;
-    // index of batch
-    const int batch_index = pos.z / sizes.z;
-    in_pos.z = slice_arg.offset + batch_index * slice_arg.image_in_channel_size + chanl_index * slice_arg.step;
-  } else if (slice_arg.dim == H_DIM) {
-    in_pos.y = slice_arg.offset + pos.y * slice_arg.step;
-  } else {
-    in_pos.x = slice_arg.offset + pos.x * slice_arg.step;
-  }
-
-  imageStore(image_out, pos, texelFetch(image_in, in_pos, 0));
-
-}
diff --git a/backends/vulkan/runtime/graph/ops/glsl/slice_unpacked_dim.yaml b/backends/vulkan/runtime/graph/ops/glsl/slice_unpacked_dim.yaml
deleted file mode 100644
index 0453bb707b1..00000000000
--- a/backends/vulkan/runtime/graph/ops/glsl/slice_unpacked_dim.yaml
+++ /dev/null
@@ -1,10 +0,0 @@
-slice_unpacked_dim:
-  parameter_names_with_default_values:
-    DTYPE: float
-    NDIM: 3
-  generate_variant_forall:
-    DTYPE:
-      - VALUE: half
-      - VALUE: float
-  shader_variants:
-    - NAME: slice_unpacked_dim
diff --git a/backends/vulkan/runtime/graph/ops/glsl/transfer_buffer.glsl b/backends/vulkan/runtime/graph/ops/glsl/transfer_buffer.glsl
new file mode 100644
index 00000000000..3ca854e0526
--- /dev/null
+++ b/backends/vulkan/runtime/graph/ops/glsl/transfer_buffer.glsl
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#version 450 core
+
+#define PRECISION ${PRECISION}
+
+#define VEC4_T ${texel_type(DTYPE)}
+#define T ${buffer_scalar_type(DTYPE)}
+
+${define_active_storage_type("buffer")}
+${define_required_extensions(DTYPE)}
+
+layout(std430) buffer;
+
+#include "indexing_utils.h"
+${layout_declare_tensor(B, "w", "t_out", DTYPE, "buffer")}
+${layout_declare_tensor(B, "r", "t_in", DTYPE, "buffer")}
+
+$if OP_NAME == "slice":
+  ${layout_declare_ubo(B, "int", "start")}
+  ${layout_declare_ubo(B, "int", "step")}
+
+$if OP_NAME == "select":
+  ${layout_declare_ubo(B, "int", "index")}
+
+layout(push_constant) uniform restrict Block {
+  ivec4 in_sizes;
+  ivec4 out_strides;
+  ivec4 in_strides;
+  int out_numel;
+  int selected_dim;
+};
+
+${layout_declare_spec_const(C, "int", "out_packed_dim", "DEFAULT_LAYOUT")}
+${layout_declare_spec_const(C, "int", "in_packed_dim", "DEFAULT_LAYOUT")}
+
+layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
+
+#include "${OP_NAME}.glslh"
+
+void main() {
+  const int out_bufi = ivec3(gl_GlobalInvocationID).x;
+  if (out_bufi >= out_numel) {
+    return;
+  }
+
+  const ivec4 out_tidx = bufi_to_tidx(out_bufi, out_strides, out_packed_dim);
+  ivec4 in_tidx = out_tidx_to_in_tidx(out_tidx);
+
+  const int in_bufi = tidx_to_bufi(in_tidx, in_strides);
+  t_out[out_bufi] = t_in[in_bufi];
+}
diff --git a/backends/vulkan/runtime/graph/ops/glsl/select_channel_3d.yaml b/backends/vulkan/runtime/graph/ops/glsl/transfer_buffer.yaml
similarity index 54%
rename from backends/vulkan/runtime/graph/ops/glsl/select_channel_3d.yaml
rename to backends/vulkan/runtime/graph/ops/glsl/transfer_buffer.yaml
index 1c5c4e34b06..bdde613c8ce 100644
--- a/backends/vulkan/runtime/graph/ops/glsl/select_channel_3d.yaml
+++ b/backends/vulkan/runtime/graph/ops/glsl/transfer_buffer.yaml
@@ -1,10 +1,13 @@
-select_channel_3d:
+transfer_buffer:
   parameter_names_with_default_values:
     DTYPE: float
-    NDIM: 3
+    OP_NAME: select
   generate_variant_forall:
     DTYPE:
       - VALUE: half
       - VALUE: float
   shader_variants:
-    - NAME: select_channel_3d
+    - NAME: select_buffer
+      OP_NAME: select
+    - NAME: slice_buffer
+      OP_NAME: slice
diff --git a/backends/vulkan/runtime/graph/ops/glsl/transfer_texture.glsl b/backends/vulkan/runtime/graph/ops/glsl/transfer_texture.glsl
new file mode 100644
index 00000000000..d3e25436c04
--- /dev/null
+++ b/backends/vulkan/runtime/graph/ops/glsl/transfer_texture.glsl
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#version 450 core
+
+#define PRECISION ${PRECISION}
+
+#define VEC4_T ${texel_type(DTYPE)}
+#define T ${buffer_scalar_type(DTYPE)}
+
+${define_active_storage_type("texture3d")}
+${define_required_extensions(DTYPE)}
+
+layout(std430) buffer;
+
+#include "indexing_utils.h"
+
+${layout_declare_tensor(B, "w", "t_out", DTYPE, "texture3d")}
+${layout_declare_tensor(B, "r", "t_in", DTYPE, "texture3d")}
+
+$if OP_NAME == "slice":
+  ${layout_declare_ubo(B, "int", "start")}
+  ${layout_declare_ubo(B, "int", "step")}
+
+$if OP_NAME == "select":
+  ${layout_declare_ubo(B, "int", "index")}
+
+layout(push_constant) uniform restrict Block {
+  ivec4 out_sizes;
+  ivec4 in_sizes;
+  int selected_dim;
+};
+
+${layout_declare_spec_const(C, "int", "out_layout", "DEFAULT_LAYOUT")}
+const lowp ivec4 out_axis_map = unhash_axis_map(out_layout);
+const lowp int out_packed_dim = unhash_packed_dim(out_layout);
+
+${layout_declare_spec_const(C, "int", "in_layout", "DEFAULT_LAYOUT")}
+const lowp ivec4 in_axis_map = unhash_axis_map(in_layout);
+const lowp int in_packed_dim = unhash_packed_dim(in_layout);
+
+layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
+
+#include "${OP_NAME}.glslh"
+
+void main() {
+  const ivec3 lpos = ivec3(gl_GlobalInvocationID);
+  ivec4 out_tidx = lpos_to_tidx(lpos, out_sizes, out_axis_map.w, out_packed_dim);
+
+  if (any(greaterThanEqual(out_tidx, out_sizes))) {
+    return;
+  }
+
+  if (can_use_fast_path()) {
+    ivec4 in_tidx = out_tidx_to_in_tidx(out_tidx);
+    ivec3 in_pos = tidx_to_pos(in_tidx, in_sizes, in_axis_map, in_packed_dim);
+    VEC4_T in_texel = VEC4_T(load_texel(t_in, in_pos));
+
+    write_texel_lpos(t_out, lpos, in_texel, out_axis_map);
+  }
+  else {
+    VEC4_T out_texel = VEC4_T(0);
+    for (int texel_i = 0; texel_i < 4; ++texel_i) {
+      ivec4 in_tidx = out_tidx_to_in_tidx(out_tidx);
+      ivec3 in_pos = tidx_to_pos(in_tidx, in_sizes, in_axis_map, in_packed_dim);
+      int element_idx = in_tidx[in_packed_dim] % 4;
+
+      VEC4_T in_texel = VEC4_T(load_texel(t_in, in_pos));
+      T selected_value = T(in_texel[element_idx]);
+
+      out_texel[texel_i] = selected_value;
+
+      out_tidx[out_packed_dim]++;
+    }
+
+    write_texel_lpos(t_out, lpos, out_texel, out_axis_map);
+  }
+}
diff --git a/backends/vulkan/runtime/graph/ops/glsl/select_channel_4d.yaml b/backends/vulkan/runtime/graph/ops/glsl/transfer_texture.yaml
similarity index 52%
rename from backends/vulkan/runtime/graph/ops/glsl/select_channel_4d.yaml
rename to backends/vulkan/runtime/graph/ops/glsl/transfer_texture.yaml
index 6236555f5dd..f877ee036e4 100644
--- a/backends/vulkan/runtime/graph/ops/glsl/select_channel_4d.yaml
+++ b/backends/vulkan/runtime/graph/ops/glsl/transfer_texture.yaml
@@ -1,10 +1,13 @@
-select_channel_4d:
+transfer_texture:
   parameter_names_with_default_values:
     DTYPE: float
-    NDIM: 3
+    OP_NAME: select
   generate_variant_forall:
     DTYPE:
       - VALUE: half
       - VALUE: float
   shader_variants:
-    - NAME: select_channel_4d
+    - NAME: select_texture3d
+      OP_NAME: select
+    - NAME: slice_texture3d
+      OP_NAME: slice
diff --git a/backends/vulkan/runtime/graph/ops/glsl/where.glsl b/backends/vulkan/runtime/graph/ops/glsl/where.glsl
new file mode 100644
index 00000000000..5df813d1241
--- /dev/null
+++ b/backends/vulkan/runtime/graph/ops/glsl/where.glsl
@@ -0,0 +1,111 @@
+// where.glsl
+
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+
+#version 450 core
+
+#define PRECISION ${PRECISION}
+
+#define VEC4_T ${texel_load_type(DTYPE, STORAGE)}
+#define T ${buffer_scalar_type(DTYPE)}
+#define COND_T ${buffer_scalar_type("bool")}
+
+${define_active_storage_type(STORAGE)}
+${define_required_extensions(DTYPE)}
+${define_required_extensions("bool")}
+
+layout(std430) buffer;
+
+${layout_declare_tensor(B, "w", "t_out", DTYPE, STORAGE)}
+${layout_declare_tensor(B, "r", "t_condition", "bool", STORAGE)}
+${layout_declare_tensor(B, "r", "t_self", DTYPE, STORAGE)}
+${layout_declare_tensor(B, "r", "t_other", DTYPE, STORAGE)}
+
+
+#include "indexing_utils.h"
+
+$if STORAGE == "buffer":
+  ${layout_declare_ubo(B, "int", "out_numl")}
+  ${layout_declare_ubo(B, "ivec4", "out_strides")}
+  ${layout_declare_ubo(B, "ivec4", "cond_strides")}
+  ${layout_declare_ubo(B, "ivec4", "self_strides")}
+  ${layout_declare_ubo(B, "ivec4", "other_strides")}
+
+  ${layout_declare_spec_const(C, "int", "out_packed_dim", "DEFAULT_LAYOUT")}
+  ${layout_declare_spec_const(C, "int", "cond_packed_dim", "DEFAULT_LAYOUT")}
+  ${layout_declare_spec_const(C, "int", "self_packed_dim", "DEFAULT_LAYOUT")}
+  ${layout_declare_spec_const(C, "int", "other_packed_dim", "DEFAULT_LAYOUT")}
+$else:
+  ${layout_declare_ubo(B, "ivec3", "out_limits")}
+
+layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
+
+#ifdef USING_BUFFER
+
+void main() {
+  int out_bufi = int(gl_GlobalInvocationID.x);
+  // ivec4 tidx = ivec4(gl_GlobalInvocationID, 0);
+  // int out_bufi = tidx_to_bufi(tidx, out_strides);
+  // int cond_bufi = tidx_to_bufi(tidx, cond_strides);
+  // int self_bufi = tidx_to_bufi(tidx, self_strides);
+  // int other_bufi = tidx_to_bufi(tidx, other_strides);
+  if (out_bufi >= out_numl) {
+    return;
+  }
+
+  const ivec4 out_tidx = bufi_to_tidx(out_bufi, out_strides, out_packed_dim);
+  out_bufi = tidx_to_bufi(out_tidx, out_strides);
+
+  const ivec4 cond_tidx = bufi_to_tidx(out_bufi, out_strides, out_packed_dim);
+  const int cond_bufi = tidx_to_bufi(cond_tidx, cond_strides);
+
+  const ivec4 self_tidx = bufi_to_tidx(out_bufi, out_strides, out_packed_dim);
+  const int self_bufi = tidx_to_bufi(self_tidx, self_strides);
+
+  const ivec4 other_tidx = bufi_to_tidx(out_bufi, out_strides, out_packed_dim);
+  const int other_bufi = tidx_to_bufi(other_tidx, other_strides);
+
+  COND_T cond = t_condition[cond_bufi] ;
+  T v_self = t_self[self_bufi];
+  T v_other = t_other[other_bufi];
+
+  if (cond > 0) {
+    t_out[out_bufi] = v_self;
+  } else {
+    t_out[out_bufi] = v_other;
+  }
+}
+
+#else // !USING_BUFFER
+
+void main() {
+  const ivec3 pos = ivec3(gl_GlobalInvocationID);
+
+
+  if (any(greaterThanEqual(pos, out_limits))) {
+    return;
+  }
+
+  vec4 cond = load_texel(t_condition, pos);
+  VEC4_T selftex = load_texel(t_self, pos);
+  VEC4_T othertex = load_texel(t_other, pos);
+
+  VEC4_T outtex;
+
+  for (int idx = 0; idx < 4; ++idx) {
+    if (cond[idx] == 1) {
+      outtex[idx] = selftex[idx];
+    } else {
+      outtex[idx] = othertex[idx];
+    }
+  }
+  write_texel(t_out, pos, outtex);
+}
+ #endif // !USING_BUFFER
diff --git a/backends/vulkan/runtime/graph/ops/glsl/select_batch_4d.yaml b/backends/vulkan/runtime/graph/ops/glsl/where.yaml
similarity index 64%
rename from backends/vulkan/runtime/graph/ops/glsl/select_batch_4d.yaml
rename to backends/vulkan/runtime/graph/ops/glsl/where.yaml
index 9c7d54c8f69..edbd843a336 100644
--- a/backends/vulkan/runtime/graph/ops/glsl/select_batch_4d.yaml
+++ b/backends/vulkan/runtime/graph/ops/glsl/where.yaml
@@ -1,10 +1,12 @@
-select_batch_4d:
+where:
   parameter_names_with_default_values:
     DTYPE: float
-    NDIM: 3
   generate_variant_forall:
+    STORAGE:
+      - VALUE: texture3d
+      - VALUE: buffer
     DTYPE:
       - VALUE: half
       - VALUE: float
   shader_variants:
-    - NAME: select_batch_4d
+    - NAME: where
diff --git a/backends/vulkan/runtime/graph/ops/impl/Clone.cpp b/backends/vulkan/runtime/graph/ops/impl/Clone.cpp
index b547bc3572d..d0276b1783b 100644
--- a/backends/vulkan/runtime/graph/ops/impl/Clone.cpp
+++ b/backends/vulkan/runtime/graph/ops/impl/Clone.cpp
@@ -105,9 +105,9 @@ void add_buffer_to_image_node(
       // Input and Outputs
       {{image, vkapi::kWrite}, {buffer, vkapi::kRead}},
       // Parameter Buffers
-      {graph.sizes_ubo(image), graph.strides_ubo(buffer)},
-      // Push Constants
       {},
+      // Push Constants
+      {graph.sizes_pc_of(image), graph.strides_pc_of(buffer)},
       // Specialization Constants
       {graph.hashed_layout_of(image)},
       // Resize Args
diff --git a/backends/vulkan/runtime/graph/ops/impl/Common.cpp b/backends/vulkan/runtime/graph/ops/impl/Common.cpp
new file mode 100644
index 00000000000..4de099231d3
--- /dev/null
+++ b/backends/vulkan/runtime/graph/ops/impl/Common.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <executorch/backends/vulkan/runtime/graph/ops/impl/Common.h>
+
+namespace vkcompute {
+
+utils::uvec3 default_pick_global_wg_size(
+    ComputeGraph* graph,
+    const vkapi::ShaderInfo& shader,
+    const std::vector<ArgGroup>& args,
+    const std::vector<ValueRef>& additional_args) {
+  (void)shader;
+  const ValueRef out = args.at(0).refs.at(0);
+  return graph->create_global_wg_size(out);
+}
+
+utils::uvec3 default_pick_local_wg_size(
+    ComputeGraph* graph,
+    const vkapi::ShaderInfo& shader,
+    const utils::uvec3& global_workgroup_size,
+    const std::vector<ArgGroup>& args,
+    const std::vector<ValueRef>& additional_args) {
+  (void)shader;
+  return graph->create_local_wg_size(global_workgroup_size);
+}
+
+} // namespace vkcompute
diff --git a/backends/vulkan/runtime/graph/ops/impl/Common.h b/backends/vulkan/runtime/graph/ops/impl/Common.h
new file mode 100644
index 00000000000..d5ff455ae41
--- /dev/null
+++ b/backends/vulkan/runtime/graph/ops/impl/Common.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <executorch/backends/vulkan/runtime/graph/ComputeGraph.h>
+#include <executorch/backends/vulkan/runtime/graph/ops/ExecuteNode.h>
+
+namespace vkcompute {
+
+/**
+ * Creates a global workgroup size based on the first output tensor in the args.
+ * This is a utility function that extracts the output tensor from
+ * args.at(0).refs.at(0) and calls graph->create_global_wg_size(out) on it.
+ *
+ * @param graph The ComputeGraph instance
+ * @param args Vector of ArgGroup containing the output tensor reference
+ * @return utils::uvec3 The global workgroup size
+ */
+utils::uvec3 default_pick_global_wg_size(
+    ComputeGraph* graph,
+    const vkapi::ShaderInfo& shader,
+    const std::vector<ArgGroup>& args,
+    const std::vector<ValueRef>& additional_args);
+
+/**
+ * Creates a local workgroup size based on the first output tensor in the args.
+ * This is a utility function that extracts the output tensor from
+ * args.at(0).refs.at(0) and calls graph->create_local_wg_size(out) on it.
+ *
+ * @param graph The ComputeGraph instance
+ * @param args Vector of ArgGroup containing the output tensor reference
+ * @return utils::uvec3 The local workgroup size
+ */
+utils::uvec3 default_pick_local_wg_size(
+    ComputeGraph* graph,
+    const vkapi::ShaderInfo& shader,
+    const utils::uvec3& global_workgroup_size,
+    const std::vector<ArgGroup>& args,
+    const std::vector<ValueRef>& additional_args);
+
+} // namespace vkcompute
diff --git a/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp b/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp
index fbe4a61befc..32f478fa5bd 100644
--- a/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp
+++ b/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp
@@ -106,9 +106,10 @@ ValueRef prepack_biases(
       graph.create_local_wg_size(v),
       vref,
       v,
-      {t->sizes_ubo()},
+      {},
       // Specialization constants
-      {t->hashed_layout()}));
+      {t->hashed_layout()},
+      {graph.sizes_pc_of(v)}));
 
   return v;
 }
diff --git a/backends/vulkan/runtime/graph/ops/impl/Select.cpp b/backends/vulkan/runtime/graph/ops/impl/Select.cpp
index a83e986e414..69d49e8283b 100644
--- a/backends/vulkan/runtime/graph/ops/impl/Select.cpp
+++ b/backends/vulkan/runtime/graph/ops/impl/Select.cpp
@@ -8,129 +8,136 @@
 
 #include <executorch/backends/vulkan/runtime/graph/ops/OperatorRegistry.h>
 
-#include <executorch/backends/vulkan/runtime/api/api.h>
-#include <executorch/backends/vulkan/runtime/graph/Logging.h>
-
-#include <executorch/backends/vulkan/runtime/graph/ops/impl/utils/KernelUtils.h>
+#include <executorch/backends/vulkan/runtime/graph/ops/impl/Transfer.h>
 #include <executorch/backends/vulkan/runtime/graph/ops/impl/utils/TensorUtils.h>
 #include <executorch/backends/vulkan/runtime/graph/ops/utils/ShaderNameUtils.h>
 
 namespace vkcompute {
 
-void check_args(
-    const api::vTensor& t_in,
-    int64_t dim,
-    int64_t index,
-    const api::vTensor& t_out) {
-  VK_CHECK_COND(check_packed_dim_is(t_in, WHCN::kChannelsDim));
-  VK_CHECK_COND(check_packed_dim_is(t_out, WHCN::kChannelsDim));
+void resize_select_node(
+    ComputeGraph* graph,
+    const std::vector<ArgGroup>& args,
+    const std::vector<ValueRef>& extra_args) {
+  ValueRef out = args.at(0).refs.at(0);
+  ValueRef in = args.at(1).refs.at(0);
+  int64_t dim = graph->extract_scalar<int64_t>(extra_args.at(0));
 
-  const int64_t in_dim = t_in.dim();
-  VK_CHECK_COND(
-      in_dim == 3 || in_dim == 4,
-      "Vulkan select only support 3d or 4d tensors!");
-
-  const int64_t in_size = t_in.size(dim);
-
-  if (index < -in_size || index >= in_size) {
-    VK_CHECK_COND(
-        false,
-        "select(): index ",
-        index,
-        " t_outof range for tensor of size ",
-        in_size,
-        " at dimension ",
-        dim);
+  int64_t in_ndim = graph->dim_of(in);
+
+  if (dim < 0) {
+    dim += in_ndim;
+  }
+
+  std::vector<int64_t> new_out_sizes;
+  for (int64_t i = 0; i < in_ndim; ++i) {
+    if (i != dim) {
+      new_out_sizes.push_back(graph->size_at<int64_t>(i, in));
+    }
   }
+
+  graph->virtual_resize(out, new_out_sizes);
 }
 
-void add_select_int_node(
+void check_select_args(
     ComputeGraph& graph,
     const ValueRef in,
     const ValueRef dim_ref,
     const ValueRef index_ref,
     const ValueRef out) {
-  vTensorPtr t_in = graph.get_tensor(in);
-  vTensorPtr t_out = graph.get_tensor(out);
   int64_t dim = graph.extract_scalar<int64_t>(dim_ref);
-  int64_t index = graph.extract_scalar<int64_t>(index_ref);
+  int64_t index = graph.extract_optional_scalar<int64_t>(index_ref, 0);
+  int64_t in_ndim = graph.dim_of(in);
 
-  check_args(*t_in, dim, index, *t_out);
+  if (dim < 0) {
+    dim += in_ndim;
+  }
 
-  const int64_t in_size = t_in->size(dim);
+  VK_CHECK_COND(
+      dim >= 0 && dim < in_ndim,
+      "Dimension out of range (expected to be in range of [",
+      -in_ndim,
+      ", ",
+      in_ndim - 1,
+      "], but got ",
+      dim,
+      ")");
+
+  const int64_t in_size_at_dim = graph.size_at<int64_t>(dim, in);
 
   if (index < 0) {
-    index += in_size;
+    index += in_size_at_dim;
   }
 
-  std::string kernel_name;
-
-  // for 3d tensors, these values are not used by the shader.
-  int32_t num_texel_per_batch = 1;
-  int32_t num_batches = 1;
-
-  int64_t in_dim = t_in->dim();
-  if (in_dim == 3) {
-    if (dim == 0) {
-      kernel_name = "select_channel_3d";
-    } else if (dim == 1) {
-      kernel_name = "select_height_3d";
-    } else if (dim == 2) {
-      kernel_name = "select_width_3d";
-    } else {
-      VK_CHECK_COND(
-          false, "Unexpected dim value=", dim, "for the input 3d tensor");
-    }
-  } else { // self.dim() == 4
-    num_texel_per_batch =
-        static_cast<int32_t>(std::ceil(static_cast<float>(t_in->size(1)) / 4));
-    num_batches = t_in->size(0);
-    if (dim == 0) {
-      kernel_name = "select_batch_4d";
-    } else if (dim == 1) {
-      kernel_name = "select_channel_4d";
-    } else if (dim == 2) {
-      kernel_name = "select_height_4d";
-    } else if (dim == 3) {
-      kernel_name = "select_width_4d";
-    } else {
+  VK_CHECK_COND(
+      index >= 0 && index < in_size_at_dim,
+      "select(): index ",
+      index,
+      " out of range for tensor of size ",
+      in_size_at_dim,
+      " at dimension ",
+      dim);
+
+  // Check that output tensor has correct dimensions
+  int64_t out_dim = graph.dim_of(out);
+  VK_CHECK_COND(
+      out_dim == in_ndim - 1,
+      "Output tensor dimension mismatch (expected ",
+      in_size_at_dim - 1,
+      ", but got ",
+      out_dim,
+      ")");
+
+  // Check that output tensor has correct sizes
+  int64_t out_idx = 0;
+  for (int64_t i = 0; i < in_size_at_dim; ++i) {
+    if (i != dim) {
       VK_CHECK_COND(
-          false, "Unexpected dim value=", dim, "for the input 4d tensor");
+          graph.size_at<int64_t>(out_idx, out) == graph.size_at<int64_t>(i, in),
+          "Output size mismatch at dimension ",
+          out_idx,
+          " (expected ",
+          graph.size_at<int16_t>(i, in),
+          ", but got ",
+          graph.size_at<int64_t>(out_idx, out),
+          ")");
+      out_idx++;
     }
   }
+}
 
-  kernel_name.reserve(kShaderNameReserve);
-  add_dtype_suffix(kernel_name, *t_out);
+/**
+ * Adds a select operation node to the compute graph.
+ *
+ * The select operator extracts a slice from a tensor along a specified
+ * dimension at a given index. It effectively reduces the dimensionality of the
+ * input tensor by one, by selecting a single slice at the specified index along
+ * the given dimension. For example, if input is a 3D tensor with shape [2,3,4]
+ * and we select dimension 1, index 2, the output will be a 2D tensor with shape
+ * [2,4].
+ */
+void add_select_copy_node(
+    ComputeGraph& graph,
+    const ValueRef in,
+    const ValueRef dim_ref,
+    const ValueRef index_ref,
+    const ValueRef out) {
+  check_select_args(graph, in, dim_ref, index_ref, out);
 
-  // TODO: add resizing to support dynamic shapes.
-  graph.execute_nodes().emplace_back(new DispatchNode(
+  add_transfer_copy_node(
       graph,
-      VK_KERNEL_FROM_STR(kernel_name),
-      graph.create_global_wg_size(out),
-      graph.create_local_wg_size(out),
-      // Inputs and Outputs
-      {{out, vkapi::MemoryAccessType::WRITE},
-       {in, vkapi::MemoryAccessType::READ}},
-      // Parameter buffers
-      {t_out->logical_limits_ubo(),
-       t_out->sizes_ubo(),
-       // TODO: num_batches and num_texel_per_batch are provided by
-       // t_out->sizes. Can change the following to reduce params
-       // created.
-       graph.create_params_buffer(
-           utils::make_ivec4({index, num_batches, num_texel_per_batch, 0}))},
-      // Push Constants
-      {},
-      // Specialization Constants
-      {},
-      // Resize Args
-      {},
-      // Resizing Logic
-      nullptr));
+      TransferType::SELECT,
+      in,
+      dim_ref,
+      index_ref,
+      kDummyValueRef,
+      kDummyValueRef,
+      out,
+      {dim_ref, index_ref},
+      resize_select_node);
 }
 
 void select_int(ComputeGraph& graph, const std::vector<ValueRef>& args) {
-  return add_select_int_node(graph, args[0], args[1], args[2], args[3]);
+  return add_select_copy_node(graph, args[0], args[1], args[2], args[3]);
 }
 
 REGISTER_OPERATORS {
diff --git a/backends/vulkan/runtime/graph/ops/impl/Slice.cpp b/backends/vulkan/runtime/graph/ops/impl/Slice.cpp
index c40e16f7c0a..67d714d10aa 100644
--- a/backends/vulkan/runtime/graph/ops/impl/Slice.cpp
+++ b/backends/vulkan/runtime/graph/ops/impl/Slice.cpp
@@ -8,12 +8,10 @@
 
 #include <executorch/backends/vulkan/runtime/graph/ops/OperatorRegistry.h>
 
-#include <executorch/backends/vulkan/runtime/graph/Logging.h>
-
 #include <executorch/backends/vulkan/runtime/graph/ops/impl/Slice.h>
+#include <executorch/backends/vulkan/runtime/graph/ops/impl/Transfer.h>
 
 #include <executorch/backends/vulkan/runtime/graph/ops/impl/utils/DimUtils.h>
-#include <executorch/backends/vulkan/runtime/graph/ops/impl/utils/KernelUtils.h>
 #include <executorch/backends/vulkan/runtime/graph/ops/impl/utils/TensorUtils.h>
 #include <executorch/backends/vulkan/runtime/graph/ops/utils/ShaderNameUtils.h>
 
@@ -33,127 +31,73 @@ inline int64_t normalize_idx(
   return normalize(index, max);
 }
 
-void add_slice_tensor_copy_node(
-    ComputeGraph& graph,
-    ValueRef in,
-    ValueRef dim_ref,
-    ValueRef opt_start_ref,
-    ValueRef opt_end_ref,
-    ValueRef step_ref,
-    ValueRef out) {
-  vTensorPtr t_in = graph.get_tensor(in);
-  vTensorPtr t_out = graph.get_tensor(out);
-
-  VK_CHECK_COND(check_same_packed_dim(*t_in, *t_out));
-
-  // Need normalize the dim
-  int64_t dim = graph.extract_scalar<int64_t>(dim_ref);
-
-  VK_CHECK_COND(
-      -t_in->dim() <= dim && dim < t_in->dim(),
-      "dim must be in range of [-self.dim(), self.dim()), but current dim's value is ",
-      dim,
-      " and self.dim() = ",
-      t_in->dim());
-
-  dim = normalize(dim, t_in->dim());
-
-  DimIndex dim_index = normalize_to_dim_index(*t_in, dim);
+void resize_slice_copy_node(
+    ComputeGraph* graph,
+    const std::vector<ArgGroup>& args,
+    const std::vector<ValueRef>& extra_args) {
+  ValueRef out_ref = args.at(0).refs.at(0);
+  ValueRef in_ref = args.at(1).refs.at(0);
 
+  int64_t dim = graph->extract_scalar<int64_t>(extra_args.at(0));
   std::optional<int64_t> opt_start =
-      graph.extract_optional_scalar<int64_t>(opt_start_ref);
+      graph->extract_optional_scalar<int64_t>(extra_args.at(1));
   std::optional<int64_t> opt_end =
-      graph.extract_optional_scalar<int64_t>(opt_end_ref);
-  int64_t step = graph.extract_scalar<int64_t>(step_ref);
-
-  const auto in_sizes = t_in->sizes();
-  const auto out_sizes = t_out->sizes();
-
-  int64_t start = opt_start.value_or(0);
-  int64_t end = opt_end.value_or(in_sizes[dim]);
+      graph->extract_optional_scalar<int64_t>(extra_args.at(2));
+  int64_t step = graph->extract_scalar<int64_t>(extra_args.at(3));
 
-  start = normalize_idx(start, in_sizes[dim], 0);
-  end = normalize_idx(end, in_sizes[dim], in_sizes[dim]);
+  // Normalize dim
+  if (dim < 0) {
+    dim += graph->dim_of(in_ref);
+  }
 
-  const vkapi::SpecVarList spec_vars = {t_in->packed_dim()};
+  const std::vector<int64_t> in_sizes = graph->sizes_of(in_ref);
+  int64_t dim_size = in_sizes.at(dim);
 
-  const auto packed_dim_idx =
-      static_cast<DimIndex>(DimIndex::DIM_LAST - t_in->packed_dim());
+  int64_t start = opt_start.value_or(0);
+  int64_t end = opt_end.value_or(dim_size);
 
-  // if slice dim is the same as the packed dim, we can use the channel slice
-  if (dim_index == packed_dim_idx) {
-    // slice by channel
-    std::string kernel_name = "slice_packed_dim";
-    kernel_name.reserve(kShaderNameReserve);
-    add_dtype_suffix(kernel_name, *t_out);
+  // Normalize start and end indices
+  start = normalize_idx(start, dim_size, 0);
+  end = normalize_idx(end, dim_size, dim_size);
 
-    const struct Block final {
-      int offset;
-      int step;
-    } params{
-        static_cast<int32_t>(start),
-        static_cast<int32_t>(step),
-    };
+  // Calculate output size
+  std::vector<int64_t> new_out_sizes = in_sizes;
+  new_out_sizes.at(dim) = (end - start + step - 1) / step; // Ceiling division
 
-    graph.execute_nodes().emplace_back(new DispatchNode(
-        graph,
-        VK_KERNEL_FROM_STR(kernel_name),
-        graph.create_global_wg_size(out),
-        graph.create_local_wg_size(out),
-        {{out, vkapi::MemoryAccessType::WRITE},
-         {in, vkapi::MemoryAccessType::READ}},
-        {t_out->sizes_ubo(),
-         t_in->sizes_ubo(),
-         graph.create_params_buffer(params)},
-        {},
-        spec_vars,
-        {},
-        nullptr));
-
-  } else {
-    // GPU's coordinate is in x = 0, y = 1, z = 2, w = 3
-    const int64_t gpu_dim = -(dim_index + 1);
-    // stride of input tensor's channel dimension
-    int64_t in_channel_stride = dim_at(in_sizes, kChannel4D);
-    VK_CHECK_COND(out_sizes[dim] == (1 + (end - start - 1) / step));
-
-    // Due to channel packing, each batch value is span over stride planes
-    if (dim_index == kBatch4D && packed_dim_idx == kChannel4D) {
-      in_channel_stride = utils::div_up_4(in_channel_stride);
-    }
+  graph->virtual_resize(out_ref, new_out_sizes);
+}
 
-    std::string kernel_name = "slice_unpacked_dim";
-    kernel_name.reserve(kShaderNameReserve);
-    add_dtype_suffix(kernel_name, *t_out);
-
-    utils::uvec3 global_size = t_out->logical_limits();
-    utils::uvec3 local_size = graph.create_local_wg_size(global_size);
-
-    const struct Block final {
-      int dim;
-      int offset;
-      int step;
-      int stride;
-    } params{
-        static_cast<int32_t>(gpu_dim),
-        static_cast<int32_t>(start),
-        static_cast<int32_t>(step),
-        static_cast<int32_t>(in_channel_stride),
-    };
-
-    graph.execute_nodes().emplace_back(new DispatchNode(
-        graph,
-        VK_KERNEL_FROM_STR(kernel_name),
-        global_size,
-        local_size,
-        {{out, vkapi::MemoryAccessType::WRITE},
-         {in, vkapi::MemoryAccessType::READ}},
-        {t_out->sizes_ubo(), graph.create_params_buffer(params)},
-        {},
-        spec_vars,
-        {},
-        nullptr));
-  }
+/**
+ * Adds a slice_copy operation node to the compute graph.
+ *
+ * The slice operator extracts a portion of a tensor along a specified
+ * dimension. It creates a new tensor that contains a subset of the input
+ * tensor's data, defined by start, end, and step parameters along the given
+ * dimension.
+ *
+ * For example, if input is a tensor with shape [4,5,6] and we slice along
+ * dimension 1 with start=1, end=4, step=2, the output will have shape [4,2,6],
+ * containing elements from the input at positions 1 and 3 along dimension 1.
+ */
+void add_slice_copy_node(
+    ComputeGraph& graph,
+    ValueRef in,
+    ValueRef dim_ref,
+    ValueRef opt_start_ref,
+    ValueRef opt_end_ref,
+    ValueRef step_ref,
+    ValueRef out) {
+  add_transfer_copy_node(
+      graph,
+      TransferType::SLICE,
+      in,
+      dim_ref,
+      opt_start_ref,
+      opt_end_ref,
+      step_ref,
+      out,
+      {dim_ref, opt_start_ref, opt_end_ref, step_ref},
+      resize_slice_copy_node);
 }
 
 std::vector<int64_t> get_slice_sizes(
@@ -186,16 +130,16 @@ void resize_slice_view_node(
     const std::vector<ArgGroup>& args,
     const std::vector<ValueRef>& extra_args) {
   (void)args;
-  vTensorPtr out = graph->get_tensor(extra_args[0]);
+  ValueRef out_ref = extra_args.at(0);
 
   std::vector<int64_t> new_out_sizes = get_slice_sizes(
       *graph,
-      extra_args[1], // input
-      extra_args[2], // dim
-      extra_args[3], // optional start
-      extra_args[4]); // optional end
+      extra_args.at(1), // input
+      extra_args.at(2), // dim
+      extra_args.at(3), // optional start
+      extra_args.at(4)); // optional end
 
-  out->virtual_resize(new_out_sizes);
+  graph->virtual_resize(out_ref, new_out_sizes);
 }
 
 void check_slice_view_args(
@@ -267,54 +211,54 @@ void add_slice_view_node(
   std::vector<int64_t> new_out_sizes =
       get_slice_sizes(graph, in_ref, dim_ref, opt_start_ref, opt_end_ref);
 
-  graph.get_tensor(out_ref)->virtual_resize(new_out_sizes);
+  graph.virtual_resize(out_ref, new_out_sizes);
 
   graph.execute_nodes().emplace_back(new ExecuteNode(
       resize_slice_view_node,
       {out_ref, in_ref, dim_ref, opt_start_ref, opt_end_ref, opt_step_ref}));
 }
 
-void slice_tensor_copy(ComputeGraph& graph, const std::vector<ValueRef>& args) {
-  return add_slice_tensor_copy_node(
+void slice_copy(ComputeGraph& graph, const std::vector<ValueRef>& args) {
+  return add_slice_copy_node(
       graph,
-      args[0],
-      args[1], // dim
-      args[2], // optional start
-      args[3], // optional end
-      args[4], // step
-      args[5]);
+      args.at(0),
+      args.at(1), // dim
+      args.at(2), // optional start
+      args.at(3), // optional end
+      args.at(4), // step
+      args.at(5));
 }
 
-void slice_tensor(ComputeGraph& graph, const std::vector<ValueRef>& args) {
-  ValueRef in = args[0];
-  ValueRef out = args[5];
+void slice(ComputeGraph& graph, const std::vector<ValueRef>& args) {
+  ValueRef in = args.at(0);
+  ValueRef out = args.at(5);
 
   // Special case if out is a view of in
   if (graph.val_is_view_of(out, in)) {
     add_slice_view_node(
         graph,
         in,
-        args[1], // dim
-        args[2], // optional start
-        args[3], // optional end
-        args[4], // step
+        args.at(1), // dim
+        args.at(2), // optional start
+        args.at(3), // optional end
+        args.at(4), // step
         out);
     return;
   }
 
-  add_slice_tensor_copy_node(
+  add_slice_copy_node(
       graph,
       in,
-      args[1], // dim
-      args[2], // optional start
-      args[3], // optional end
-      args[4], // step
+      args.at(1), // dim
+      args.at(2), // optional start
+      args.at(3), // optional end
+      args.at(4), // step
       out);
 }
 
 REGISTER_OPERATORS {
-  VK_REGISTER_OP(aten.slice_copy.Tensor, slice_tensor_copy);
-  VK_REGISTER_OP(aten.slice.Tensor, slice_tensor);
+  VK_REGISTER_OP(aten.slice_copy.Tensor, slice_copy);
+  VK_REGISTER_OP(aten.slice.Tensor, slice);
 }
 
 } // namespace vkcompute
diff --git a/backends/vulkan/runtime/graph/ops/impl/Staging.cpp b/backends/vulkan/runtime/graph/ops/impl/Staging.cpp
index f39b0fc33ff..8c060a9da4b 100644
--- a/backends/vulkan/runtime/graph/ops/impl/Staging.cpp
+++ b/backends/vulkan/runtime/graph/ops/impl/Staging.cpp
@@ -28,14 +28,14 @@ void add_staging_to_tensor_node(
   vkapi::ShaderInfo shader = get_nchw_to_tensor_shader(
       *graph.get_tensor(out_tensor), graph.int8_buffers_enabled());
 
-  vkapi::ParamsBindList ubos;
+  std::vector<PushConstantDataInfo> pcs;
   if (graph.is_buffer_storage(out_tensor)) {
-    ubos.append(
-        {graph.sizes_ubo(out_tensor),
-         graph.strides_ubo(out_tensor),
-         graph.numel_ubo(out_tensor)});
+    pcs = {
+        graph.sizes_pc_of(out_tensor),
+        graph.strides_pc_of(out_tensor),
+        graph.numel_pc_of(out_tensor)};
   } else {
-    ubos.append({graph.sizes_ubo(out_tensor)});
+    pcs = {graph.sizes_pc_of(out_tensor)};
   }
 
   graph.execute_nodes().emplace_back(new DispatchNode(
@@ -46,9 +46,9 @@ void add_staging_to_tensor_node(
       // Input and Outputs
       {{out_tensor, vkapi::kWrite}, {in_staging, vkapi::kRead}},
       // Parameter Buffers
-      ubos,
-      // Push Constants
       {},
+      // Push Constants
+      pcs,
       // Specialization Constants
       {graph.hashed_layout_of(out_tensor)},
       // Resize Args
@@ -127,14 +127,14 @@ void add_prepack_standard_node(
   vkapi::ShaderInfo shader = get_nchw_to_tensor_shader(
       *graph.get_tensor(tensor), graph.int8_buffers_enabled());
 
-  vkapi::ParamsBindList ubos;
+  std::vector<PushConstantDataInfo> pcs;
   if (graph.is_buffer_storage(tensor)) {
-    ubos.append(
-        {graph.sizes_ubo(tensor),
-         graph.strides_ubo(tensor),
-         graph.numel_ubo(tensor)});
+    pcs = {
+        graph.sizes_pc_of(tensor),
+        graph.strides_pc_of(tensor),
+        graph.numel_pc_of(tensor)};
   } else {
-    ubos.append({graph.sizes_ubo(tensor)});
+    pcs = {graph.sizes_pc_of(tensor)};
   }
 
   int transpose_hw_spec = transpose_hw ? 1 : 0;
@@ -148,9 +148,10 @@ void add_prepack_standard_node(
       tensor_data,
       tensor,
       // Parameter Buffers
-      ubos,
+      {},
       // Specialization Constants
-      {graph.hashed_layout_of(tensor), transpose_hw_spec}));
+      {graph.hashed_layout_of(tensor), transpose_hw_spec},
+      pcs));
 }
 
 ValueRef prepack_standard(
diff --git a/backends/vulkan/runtime/graph/ops/impl/Transfer.cpp b/backends/vulkan/runtime/graph/ops/impl/Transfer.cpp
new file mode 100644
index 00000000000..423c9789d67
--- /dev/null
+++ b/backends/vulkan/runtime/graph/ops/impl/Transfer.cpp
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <executorch/backends/vulkan/runtime/graph/ops/impl/Common.h>
+#include <executorch/backends/vulkan/runtime/graph/ops/impl/Transfer.h>
+
+#include <executorch/backends/vulkan/runtime/graph/ops/impl/utils/DimUtils.h>
+#include <executorch/backends/vulkan/runtime/graph/ops/impl/utils/TensorUtils.h>
+#include <executorch/backends/vulkan/runtime/graph/ops/utils/ShaderNameUtils.h>
+
+namespace vkcompute {
+
+/**
+ * Adds a transfer copy operation node to the compute graph.
+ * This function handles both SELECT and SLICE operations based on the
+ * transfer_type parameter.
+ */
+void add_transfer_copy_node(
+    ComputeGraph& graph,
+    TransferType transfer_type,
+    const ValueRef in,
+    const ValueRef dim_ref,
+    const ValueRef index_or_start_ref,
+    const ValueRef end_ref,
+    const ValueRef step_ref,
+    const ValueRef out,
+    const std::vector<ValueRef>& resize_args,
+    const ExecuteNode::ResizeFunction& resize_fn) {
+  int64_t ndim = graph.dim_of(in);
+  int64_t dim = graph.extract_scalar<int64_t>(dim_ref);
+
+  if (dim < 0) {
+    dim += ndim;
+  }
+
+  int64_t dim_whcn = nchw_dim_to_whcn_dim(dim, ndim);
+
+  vkapi::ParamsBindList param_buffers;
+  if (transfer_type == TransferType::SELECT) {
+    param_buffers = {
+        graph.get_or_create_int_param_buffer(index_or_start_ref, 0)};
+  } else { // TransferType::SLICE
+    param_buffers = {
+        graph.get_or_create_int_param_buffer(index_or_start_ref, 0),
+        graph.get_or_create_int_param_buffer(step_ref, 1)};
+  }
+
+  const struct TransferParams {
+    const int32_t dim;
+  } transfer_params{static_cast<int32_t>(dim_whcn)};
+
+  std::vector<PushConstantDataInfo> push_constants;
+  vkapi::SpecVarList spec_vars;
+
+  if (graph.is_buffer_storage(out)) {
+    push_constants = {
+        graph.sizes_pc_of(in),
+        graph.strides_pc_of(out),
+        graph.strides_pc_of(in),
+        graph.numel_pc_of(out),
+        PushConstantDataInfo(&transfer_params, sizeof(transfer_params))};
+
+    spec_vars = {
+        graph.packed_dim_of(out),
+        graph.packed_dim_of(in),
+    };
+  } else {
+    push_constants = {
+        graph.sizes_pc_of(out),
+        graph.sizes_pc_of(in),
+        PushConstantDataInfo(&transfer_params, sizeof(transfer_params))};
+
+    spec_vars = {
+        graph.hashed_layout_of(out),
+        graph.hashed_layout_of(in),
+    };
+  }
+
+  // Determine the shader directly
+  std::string kernel_name;
+  if (transfer_type == TransferType::SELECT) {
+    kernel_name = "select";
+  } else { // TransferType::SLICE
+    kernel_name = "slice";
+  }
+  add_storage_type_suffix(kernel_name, graph.storage_type_of(out));
+  add_dtype_suffix(kernel_name, graph.dtype_of(out));
+
+  // Create and add the dispatch node
+  graph.execute_nodes().emplace_back(new DynamicDispatchNode(
+      graph,
+      VK_KERNEL_FROM_STR(kernel_name),
+      default_pick_global_wg_size,
+      default_pick_local_wg_size,
+      // Inputs and Outputs
+      {{out, vkapi::kWrite}, {in, vkapi::kRead}},
+      // Parameter buffers
+      param_buffers,
+      // Push Constants
+      push_constants,
+      // Specialization Constants
+      spec_vars,
+      // Resize Args
+      resize_args,
+      // Resizing Logic
+      resize_fn));
+}
+
+} // namespace vkcompute
diff --git a/backends/vulkan/runtime/graph/ops/impl/Transfer.h b/backends/vulkan/runtime/graph/ops/impl/Transfer.h
new file mode 100644
index 00000000000..09aae144994
--- /dev/null
+++ b/backends/vulkan/runtime/graph/ops/impl/Transfer.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <executorch/backends/vulkan/runtime/api/api.h>
+#include <executorch/backends/vulkan/runtime/graph/ComputeGraph.h>
+#include <executorch/backends/vulkan/runtime/graph/ops/ExecuteNode.h>
+
+namespace vkcompute {
+
+enum class TransferType { SELECT, SLICE };
+
+/**
+ * Adds a transfer copy operation node to the compute graph, which implements
+ * operators for which each element of the output tensor maps to a unique
+ * element of the input tensor.
+ *
+ * This function currently handles the following operations:
+ * - select
+ * - slice
+ */
+void add_transfer_copy_node(
+    ComputeGraph& graph,
+    TransferType transfer_type,
+    const ValueRef in,
+    const ValueRef dim_ref,
+    const ValueRef index_or_start_ref,
+    const ValueRef end_ref,
+    const ValueRef step_ref,
+    const ValueRef out,
+    const std::vector<ValueRef>& resize_args,
+    const ExecuteNode::ResizeFunction& resize_fn = nullptr);
+
+} // namespace vkcompute
diff --git a/backends/vulkan/runtime/graph/ops/impl/Where.cpp b/backends/vulkan/runtime/graph/ops/impl/Where.cpp
new file mode 100644
index 00000000000..a3be34830d3
--- /dev/null
+++ b/backends/vulkan/runtime/graph/ops/impl/Where.cpp
@@ -0,0 +1,126 @@
+// Where.cpp
+
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <executorch/backends/vulkan/runtime/graph/ops/OperatorRegistry.h>
+
+#include <executorch/backends/vulkan/runtime/graph/ops/utils/ShaderNameUtils.h>
+
+namespace vkcompute {
+
+void resize_where_node(
+    ComputeGraph* graph,
+    const std::vector<ArgGroup>& args,
+    const std::vector<ValueRef>& extra_args) {
+  (void)extra_args;
+  vTensorPtr out = graph->get_tensor(args[0].refs[0]);
+  vTensorPtr in = graph->get_tensor(args[1].refs[0]);
+
+  std::vector<int64_t> in_sizes = in->sizes();
+  out->virtual_resize(in_sizes);
+}
+
+void add_where_texture_node(
+    ComputeGraph& graph,
+    const ValueRef cond,
+    const ValueRef self,
+    const ValueRef other,
+    const ValueRef out) {
+  std::string kernel_name = "where";
+
+  add_storage_type_suffix(kernel_name, graph.storage_type_of(out));
+  add_dtype_suffix(kernel_name, graph.dtype_of(out));
+
+  const utils::uvec3 global_wg_size = graph.create_global_wg_size(out);
+  const utils::uvec3 local_wg_size = graph.create_local_wg_size(global_wg_size);
+
+  graph.execute_nodes().emplace_back(new DispatchNode(
+      graph,
+      // Shader
+      VK_KERNEL_FROM_STR(kernel_name),
+      // Workgroup sizes
+      global_wg_size,
+      local_wg_size,
+      // Inputs and Outputs
+      {{out, vkapi::kWrite}, {{cond, self, other}, vkapi::kRead}},
+      // Parameter buffers
+      {graph.logical_limits_ubo(self)},
+      // Push Constants
+      {},
+      // Specialization Constants
+      {graph.packed_dim_of(out)},
+      // Resize Arguments
+      {},
+      // Resizing Logic
+      resize_where_node));
+}
+
+void add_where_buffer_node(
+    ComputeGraph& graph,
+    const ValueRef cond,
+    const ValueRef self,
+    const ValueRef other,
+    const ValueRef out) {
+  std::string kernel_name = "where";
+
+  add_storage_type_suffix(kernel_name, graph.storage_type_of(out));
+  add_dtype_suffix(kernel_name, graph.dtype_of(out));
+
+  const utils::uvec3 global_wg_size = graph.create_global_wg_size(out);
+  const utils::uvec3 local_wg_size = graph.create_local_wg_size(global_wg_size);
+
+  vkapi::ParamsBindList ubos = {
+      graph.numel_ubo(out),
+      graph.strides_ubo(out),
+      graph.strides_ubo(cond),
+      graph.strides_ubo(self),
+      graph.strides_ubo(other)};
+
+  graph.execute_nodes().emplace_back(new DispatchNode(
+      graph,
+      // Shader
+      VK_KERNEL_FROM_STR(kernel_name),
+      // Workgroup sizes
+      global_wg_size,
+      local_wg_size,
+      // Inputs and Outputs
+      {{out, vkapi::kWrite}, {{cond, self, other}, vkapi::kRead}},
+      // Parameter buffers
+      ubos,
+      // Push Constants
+      {},
+      // Specialization Constants
+      {graph.packed_dim_of(out),
+       graph.packed_dim_of(cond),
+       graph.packed_dim_of(self),
+       graph.packed_dim_of(other)},
+      // Resize Arguments
+      {},
+      // Resizing Logic
+      resize_where_node));
+}
+
+void where(ComputeGraph& graph, const std::vector<ValueRef>& args) {
+  int args_i = 0;
+  const ValueRef cond = args[args_i++];
+  const ValueRef self = args[args_i++];
+  const ValueRef other = args[args_i++];
+  const ValueRef out = args[args_i++];
+  if (graph.is_buffer_storage(out)) {
+    add_where_buffer_node(graph, cond, self, other, out);
+  } else {
+    add_where_texture_node(graph, cond, self, other, out);
+  }
+}
+
+REGISTER_OPERATORS {
+  VK_REGISTER_OP(aten.where.self, where);
+}
+
+} // namespace vkcompute
diff --git a/backends/vulkan/runtime/graph/ops/utils/ShaderNameUtils.cpp b/backends/vulkan/runtime/graph/ops/utils/ShaderNameUtils.cpp
index 469c2ed8280..e1ac4e9d40a 100644
--- a/backends/vulkan/runtime/graph/ops/utils/ShaderNameUtils.cpp
+++ b/backends/vulkan/runtime/graph/ops/utils/ShaderNameUtils.cpp
@@ -49,6 +49,7 @@ void add_dtype_suffix(std::string& kernel_name, const vkapi::ScalarType dtype) {
       break;
     case vkapi::kByte:
     case vkapi::kQUInt8:
+    case vkapi::kBool:
       kernel_name += "_uint8";
       break;
     default:
diff --git a/backends/vulkan/runtime/graph/ops/utils/StagingUtils.cpp b/backends/vulkan/runtime/graph/ops/utils/StagingUtils.cpp
index fd7e6b78c22..6f3660fb0fc 100644
--- a/backends/vulkan/runtime/graph/ops/utils/StagingUtils.cpp
+++ b/backends/vulkan/runtime/graph/ops/utils/StagingUtils.cpp
@@ -22,13 +22,17 @@ bool is_bitw8(vkapi::ScalarType dtype) {
 
 vkapi::ShaderInfo get_nchw_to_tensor_shader(
     const api::vTensor& v_dst,
-    const bool int8_buffer_enabled) {
+    bool int8_buffer_enabled,
+    bool push_constant_variant) {
   std::string kernel_name;
   kernel_name.reserve(kShaderNameReserve);
 
   if (is_bitw8(v_dst.dtype()) && v_dst.storage_type() != utils::kBuffer &&
       !int8_buffer_enabled) {
     kernel_name = "nchw_to_bitw8_image_nobitw8buffer";
+    if (!push_constant_variant) {
+      kernel_name += "_no_pc";
+    }
     add_storage_type_suffix(kernel_name, v_dst);
     add_dtype_suffix(kernel_name, v_dst);
     return VK_KERNEL_FROM_STR(kernel_name);
@@ -36,11 +40,17 @@ vkapi::ShaderInfo get_nchw_to_tensor_shader(
 
   if (v_dst.storage_type() == utils::kBuffer) {
     kernel_name = "nchw_to_buffer";
+    if (!push_constant_variant) {
+      kernel_name += "_no_pc";
+    }
     add_dtype_suffix(kernel_name, v_dst);
     return VK_KERNEL_FROM_STR(kernel_name);
   }
 
   kernel_name = "nchw_to_image";
+  if (!push_constant_variant) {
+    kernel_name += "_no_pc";
+  }
   add_storage_type_suffix(kernel_name, v_dst);
   add_dtype_suffix(kernel_name, v_dst);
 
diff --git a/backends/vulkan/runtime/graph/ops/utils/StagingUtils.h b/backends/vulkan/runtime/graph/ops/utils/StagingUtils.h
index 8d63958a738..6abbac45823 100644
--- a/backends/vulkan/runtime/graph/ops/utils/StagingUtils.h
+++ b/backends/vulkan/runtime/graph/ops/utils/StagingUtils.h
@@ -14,7 +14,8 @@ namespace vkcompute {
 
 vkapi::ShaderInfo get_nchw_to_tensor_shader(
     const api::vTensor& v_dst,
-    bool int8_buffer_enabled = true);
+    bool int8_buffer_enabled = true,
+    bool push_constant_variant = true);
 vkapi::ShaderInfo get_tensor_to_nchw_shader(
     const api::vTensor& v_src,
     bool int8_buffer_enabled = true);
diff --git a/backends/vulkan/runtime/vk_api/Types.h b/backends/vulkan/runtime/vk_api/Types.h
index 7191409c215..6531bf4710c 100644
--- a/backends/vulkan/runtime/vk_api/Types.h
+++ b/backends/vulkan/runtime/vk_api/Types.h
@@ -27,7 +27,7 @@
   _(uint8_t, VK_FORMAT_R8G8B8A8_UINT, Byte)        \
   _(int8_t, VK_FORMAT_R8G8B8A8_SINT, Char)         \
   _(int32_t, VK_FORMAT_R32G32B32A32_SINT, Int)     \
-  _(bool, VK_FORMAT_R8G8B8A8_SINT, Bool)           \
+  _(uint8_t, VK_FORMAT_R8G8B8A8_UINT, Bool)        \
   _(uint16_t, VK_FORMAT_R16G16B16A16_SFLOAT, Half) \
   _(float, VK_FORMAT_FLOAT4, Float)                \
   _(int8_t, VK_FORMAT_R8G8B8A8_SINT, QInt8)        \
diff --git a/backends/vulkan/test/op_tests/cases.py b/backends/vulkan/test/op_tests/cases.py
index 4a12f16bbf9..bf6e9683ef7 100644
--- a/backends/vulkan/test/op_tests/cases.py
+++ b/backends/vulkan/test/op_tests/cases.py
@@ -499,7 +499,9 @@ def get_ones_inputs():
 def get_select_int_inputs():
     test_suite = VkTestSuite(
         [
-            ((6, 2, 7), 0, 3),
+            ((8, 8, 8), 0, -2),
+            ((8, 8, 8), 1, -3),
+            ((8, 8, 8), 2, -4),
             ((6, 2, 7), 1, 0),
             ((6, 2, 7), 2, 3),
             ((6, 10, 7), 0, 3),
@@ -515,6 +517,10 @@ def get_select_int_inputs():
             ((8, 6, 1, 1), 1, 4),
         ]
     )
+    test_suite.layouts = ["utils::kWidthPacked", "utils::kChannelsPacked"]
+    test_suite.storage_types = ["utils::kBuffer", "utils::kTexture3D"]
+    test_suite.dtypes = ["at::kFloat"]
+    test_suite.data_gen = "make_seq_tensor"
     return test_suite
 
 
@@ -1147,6 +1153,7 @@ def get_reduce_op_inputs():
         "aten.hardsigmoid.default",
         "aten.leaky_relu.default",
         "aten.round.default",
+        "aten.tan.default",
     ]
 )
 def get_unary_ops_inputs():
@@ -1349,3 +1356,28 @@ def get_flip_inputs():
 
     test_suite = VkTestSuite([tuple(tc) for tc in test_cases])
     return test_suite
+
+
+@register_test_suite("aten.where.self")
+def get_where_inputs():
+    Test = namedtuple("Where", ["condition", "self", "other"])
+    Test.__new__.__defaults__ = (None, None, None)
+
+    test_cases = [
+        Test(condition=[11], self=[11], other=[11]),
+        Test(condition=[10, 9], self=[10, 9], other=[10, 9]),
+        Test(condition=[10, 5, 3], self=[10, 5, 3], other=[10, 5, 3]),
+        Test(condition=[2, 10, 5, 3], self=[2, 10, 5, 3], other=[2, 10, 5, 3]),
+    ]
+
+    test_suite = VkTestSuite([tuple(tc) for tc in test_cases])
+    test_suite.arg_dtype["condition"] = "at::kBool"
+    test_suite.layouts = [
+        "utils::kWidthPacked",
+        "utils::kHeightPacked",
+        "utils::kChannelsPacked",
+    ]
+    test_suite.storage_types = ["utils::kTexture3D", "utils::kBuffer"]
+    test_suite.atol = "1e-4"
+    test_suite.rtol = "1e-4"
+    return test_suite
diff --git a/backends/vulkan/test/op_tests/utils/gen_correctness_base.py b/backends/vulkan/test/op_tests/utils/gen_correctness_base.py
index e6ce135736b..5be4ddba6bf 100644
--- a/backends/vulkan/test/op_tests/utils/gen_correctness_base.py
+++ b/backends/vulkan/test/op_tests/utils/gen_correctness_base.py
@@ -282,12 +282,16 @@ def generate_suite_cpp(self) -> str:
     at::ScalarType dtype = at::kFloat,
     float low = 0.0,
     float high = 1.0) {{
-  if (high == 1.0 && low == 0.0)
-    return at::rand(sizes, at::device(at::kCPU).dtype(dtype));
 
   if (dtype == at::kChar)
     return at::randint(high, sizes, at::device(at::kCPU).dtype(dtype));
 
+  if (dtype == at::kBool)
+    return at::rand(sizes, at::device(at::kCPU)) > 0.5;
+    
+  if (high == 1.0 && low == 0.0)
+    return at::rand(sizes, at::device(at::kCPU).dtype(dtype));
+
   return at::rand(sizes, at::device(at::kCPU).dtype(dtype)) * (high - low) + low;
 }}
 
diff --git a/backends/vulkan/test/op_tests/utils/gen_correctness_vk.py b/backends/vulkan/test/op_tests/utils/gen_correctness_vk.py
index 6c165a777db..ce6ab32ce60 100644
--- a/backends/vulkan/test/op_tests/utils/gen_correctness_vk.py
+++ b/backends/vulkan/test/op_tests/utils/gen_correctness_vk.py
@@ -119,6 +119,8 @@ def gen_parameterization(self) -> str:
       return vkapi::kInt;
     case c10::kChar:
       return vkapi::kChar;
+    case c10::kBool: 
+      return vkapi::kBool;
     default:
       VK_THROW("Unsupported at::ScalarType!");
   }
diff --git a/backends/vulkan/test/test_vulkan_delegate.py b/backends/vulkan/test/test_vulkan_delegate.py
index 80ead02de9f..447e5d039f4 100644
--- a/backends/vulkan/test/test_vulkan_delegate.py
+++ b/backends/vulkan/test/test_vulkan_delegate.py
@@ -1842,3 +1842,50 @@ def forward(self, x):
             dynamic_shapes=dynamic_shapes,
             test_inputs=test_inputs,
         )
+
+    def test_select_last_height_dynamic_shapes(self):
+        """
+        Test selecting the last element along the height dimension with dynamic shapes.
+        The height dimension (dim=1) is variable.
+        """
+
+        class SelectLastHeightModule(torch.nn.Module):
+            """
+            Module that selects the last element along the height dimension (dim=1) of a 3D tensor.
+            This is equivalent to the operation: x[:, -1, :]
+            """
+
+            def __init__(self):
+                super().__init__()
+
+            def forward(self, x):
+                # Select the last element along dimension 1 (height)
+                return x[:, -1, :]
+
+        # Create the module
+        module = SelectLastHeightModule()
+
+        # Create sample inputs with a specific shape
+        # Shape: [batch_size, height, width]
+        sample_inputs = (torch.arange(1, 61).reshape(2, 10, 3).float(),)
+
+        # Define dynamic shapes for the height dimension
+        height = Dim("height", min=1, max=10)
+        dynamic_shapes = {"x": {1: height}}
+
+        # Create test inputs with different heights
+        test_inputs = [
+            (torch.arange(1, 7).reshape(2, 1, 3).float(),),  # Minimum height
+            (torch.arange(1, 19).reshape(2, 3, 3).float(),),  # Small height
+            (torch.arange(1, 43).reshape(2, 7, 3).float(),),  # Medium height
+            (torch.arange(1, 31).reshape(2, 5, 3).float(),),  # Maximum height
+        ]
+
+        # Use the testing infrastructure from TestVulkanBackend
+        test_backend = TestVulkanBackend()
+        test_backend.lower_module_and_test_output(
+            module,
+            sample_inputs,
+            dynamic_shapes=dynamic_shapes,
+            test_inputs=test_inputs,
+        )
diff --git a/backends/vulkan/test/utils/test_utils.cpp b/backends/vulkan/test/utils/test_utils.cpp
index c4acb41b7b0..dcd8c425d62 100644
--- a/backends/vulkan/test/utils/test_utils.cpp
+++ b/backends/vulkan/test/utils/test_utils.cpp
@@ -28,7 +28,7 @@ void record_nchw_to_buffer_op(
   vkapi::PipelineBarrier pipeline_barrier{};
 
   context->submit_compute_job(
-      get_nchw_to_tensor_shader(v_dst),
+      get_nchw_to_tensor_shader(v_dst, true, false),
       pipeline_barrier,
       {uint32_t(v_dst.numel()), 1, 1},
       {64, 1, 1},
@@ -74,7 +74,9 @@ void record_nchw_to_image_op(
 
   context->submit_compute_job(
       get_nchw_to_tensor_shader(
-          v_dst, context->adapter_ptr()->has_full_int8_buffers_support()),
+          v_dst,
+          context->adapter_ptr()->has_full_int8_buffers_support(),
+          false),
       pipeline_barrier,
       v_dst.logical_limits(),
       adaptive_work_group_size(v_dst.logical_limits()),
diff --git a/backends/vulkan/test/vulkan_compute_api_test.cpp b/backends/vulkan/test/vulkan_compute_api_test.cpp
index a6475d95d07..85811aaaf11 100644
--- a/backends/vulkan/test/vulkan_compute_api_test.cpp
+++ b/backends/vulkan/test/vulkan_compute_api_test.cpp
@@ -9,6 +9,7 @@
 #include <gtest/gtest.h>
 
 #include <bitset>
+#include <iomanip>
 #include <utility>
 #include <vector>
 
@@ -1600,8 +1601,7 @@ TEST(VulkanComputeGraphTest, test_simple_shared_objects_with_resize) {
       /*shared_object_idx = */ 4);
 
   // +2: t.sizes_ubo() for each staging shader
-  // +2: staging buffer for each input tensor
-  expected_vma_allocation_count += 4;
+  expected_vma_allocation_count += 2;
   EXPECT_EQ(get_vma_allocation_count(), expected_vma_allocation_count);
 
   ValueRef c = graph.add_tensor(
@@ -1621,8 +1621,7 @@ TEST(VulkanComputeGraphTest, test_simple_shared_objects_with_resize) {
       /*shared_object_idx = */ 2);
 
   // +1: t.sizes_ubo() uniform buffer for staging shader
-  // +1: staging buffer for the input tensor
-  expected_vma_allocation_count += 2;
+  expected_vma_allocation_count += 1;
   EXPECT_EQ(get_vma_allocation_count(), expected_vma_allocation_count);
 
   ValueRef e = graph.add_tensor(
@@ -1660,9 +1659,8 @@ TEST(VulkanComputeGraphTest, test_simple_shared_objects_with_resize) {
   for (auto& new_sizes : new_sizes_list) {
     graph.get_tensor(a.value)->virtual_resize(new_sizes);
     graph.get_tensor(b.value)->virtual_resize(new_sizes);
-    graph.get_tensor(c)->virtual_resize(new_sizes);
     graph.get_tensor(d.value)->virtual_resize(new_sizes);
-    graph.get_tensor(e)->virtual_resize(new_sizes);
+    graph.propagate_resize();
 
     float val_a = new_sizes[1] + 4.0f;
     float val_b = new_sizes[2] + 1.5f;
@@ -3315,17 +3313,23 @@ vkapi::ShaderInfo pick_dynamic_dispatch_shader(
 
 utils::uvec3 pick_dynamic_dispatch_global_wg_size(
     ComputeGraph* graph,
+    const vkapi::ShaderInfo& shader,
     const std::vector<ArgGroup>& args,
-    const std::vector<ValueRef>& additional_args) {
+    const std::vector<ValueRef>& resize_args) {
+  (void)shader;
   const ValueRef out = args[0].refs[0];
-
   return graph->logical_limits_of(out);
 }
 
 utils::uvec3 pick_dynamic_dispatch_local_wg_size(
     ComputeGraph* graph,
+    const vkapi::ShaderInfo& shader,
+    const utils::uvec3& global_workgroup_size,
     const std::vector<ArgGroup>& args,
-    const std::vector<ValueRef>& additional_args) {
+    const std::vector<ValueRef>& resize_args) {
+  (void)graph;
+  (void)shader;
+  (void)global_workgroup_size;
   return {64, 1, 1};
 }
 
diff --git a/backends/xnnpack/CMakeLists.txt b/backends/xnnpack/CMakeLists.txt
index a1fee7427fc..b6ba211ecb3 100644
--- a/backends/xnnpack/CMakeLists.txt
+++ b/backends/xnnpack/CMakeLists.txt
@@ -61,7 +61,10 @@ foreach(fbs_file ${_xnnpack_schema__srcs})
 endforeach()
 
 if(WIN32)
-  set(MV_COMMAND powershell -Command "Move-Item -Path ${_xnnpack_flatbuffer__outputs} -Destination ${_xnnpack_schema__outputs}")
+  set(MV_COMMAND
+      powershell -Command
+      "Move-Item -Path ${_xnnpack_flatbuffer__outputs} -Destination ${_xnnpack_schema__outputs}"
+  )
 else()
   set(MV_COMMAND mv ${_xnnpack_flatbuffer__outputs} ${_xnnpack_schema__outputs})
 endif()
@@ -96,7 +99,8 @@ include(cmake/Dependencies.cmake)
 list(TRANSFORM _xnnpack_backend__srcs PREPEND "${EXECUTORCH_ROOT}/")
 add_library(xnnpack_backend ${_xnnpack_backend__srcs})
 target_link_libraries(
-  xnnpack_backend PUBLIC ${xnnpack_third_party} executorch_core xnnpack_schema extension_threadpool
+  xnnpack_backend PUBLIC ${xnnpack_third_party} executorch_core xnnpack_schema
+                         extension_threadpool
 )
 
 target_include_directories(
@@ -114,46 +118,8 @@ target_include_directories(
 target_compile_options(xnnpack_backend PUBLIC ${_common_compile_options})
 target_link_options_shared_lib(xnnpack_backend)
 
-if(EXECUTORCH_BUILD_KERNELS_OPTIMIZED)
-  list(APPEND xnn_executor_runner_libs optimized_native_cpu_ops_lib)
-else()
-  list(APPEND xnn_executor_runner_libs portable_ops_lib)
-endif()
-
-if(EXECUTORCH_BUILD_KERNELS_CUSTOM)
-  list(APPEND xnn_executor_runner_libs $<LINK_LIBRARY:WHOLE_ARCHIVE,custom_ops>)
-endif()
-
-if(EXECUTORCH_BUILD_KERNELS_QUANTIZED)
-  list(APPEND xnn_executor_runner_libs quantized_ops_lib)
-endif()
-
-list(APPEND xnn_executor_runner_libs xnnpack_backend executorch)
-
-# ios can only build library but not binary
-if(NOT CMAKE_TOOLCHAIN_FILE MATCHES ".*(iOS|ios\.toolchain)\.cmake$")
-  #
-  # xnn_executor_runner: Like executor_runner but with XNNPACK, the binary will
-  # be at ${CMAKE_BINARY_DIR}/backends/xnnpack
-  #
-  list(TRANSFORM _xnn_executor_runner__srcs PREPEND "${EXECUTORCH_ROOT}/")
-  add_executable(xnn_executor_runner ${_xnn_executor_runner__srcs})
-
-  if(EXECUTORCH_ENABLE_EVENT_TRACER)
-    list(APPEND xnn_executor_runner_libs etdump)
-  endif()
-
-  target_link_libraries(xnn_executor_runner gflags ${xnn_executor_runner_libs})
-  target_compile_options(xnn_executor_runner PUBLIC ${_common_compile_options})
-  if(EXECUTORCH_BUILD_PTHREADPOOL)
-    target_link_libraries(xnn_executor_runner extension_threadpool pthreadpool)
-    target_compile_definitions(xnn_executor_runner PRIVATE ET_USE_THREADPOOL)
-  endif()
-endif()
-
 install(
   TARGETS xnnpack_backend
-  DESTINATION lib
   INCLUDES
   DESTINATION ${_common_include_directories}
 )
diff --git a/backends/xnnpack/README.md b/backends/xnnpack/README.md
index 2328f8e4b90..411bec99d79 100644
--- a/backends/xnnpack/README.md
+++ b/backends/xnnpack/README.md
@@ -92,7 +92,7 @@ After lowering to the XNNPACK Program, we can then prepare it for executorch and
 
 
 ### Running the XNNPACK Model with CMake
-After exporting the XNNPACK Delegated model, we can now try running it with example inputs using CMake. We can build and use the xnn_executor_runner, which is a sample wrapper for the ExecuTorch Runtime and XNNPACK Backend. We first begin by configuring the CMake build like such:
+After exporting the XNNPACK Delegated model, we can now try running it with example inputs using CMake. We can build and use the executor_runner, which is a sample wrapper for the ExecuTorch Runtime. The XNNPACK Backend is enabled via the compilation flag `-DEXECUTORCH_BUILD_XNNPACK=ON`. We first begin by configuring the CMake build like such:
 ```bash
 # cd to the root of executorch repo
 cd executorch
@@ -119,9 +119,9 @@ Then you can build the runtime componenets with
 cmake --build cmake-out -j9 --target install --config Release
 ```
 
-Now you should be able to find the executable built at `./cmake-out/backends/xnnpack/xnn_executor_runner` you can run the executable with the model you generated as such
+Now you should be able to find the executable built at `./cmake-out/executor_runner` you can run the executable with the model you generated as such
 ```bash
-./cmake-out/backends/xnnpack/xnn_executor_runner --model_path=./mv2_xnnpack_fp32.pte
+./cmake-out/executor_runner --model_path=./mv2_xnnpack_fp32.pte
 ```
 
 ## Help & Improvements
diff --git a/backends/xnnpack/operators/node_visitor.py b/backends/xnnpack/operators/node_visitor.py
index 8470184d808..b7d16b18bd1 100644
--- a/backends/xnnpack/operators/node_visitor.py
+++ b/backends/xnnpack/operators/node_visitor.py
@@ -274,19 +274,46 @@ def get_per_channel_dtype(
 
         return dtype
 
-    def get_quant_params(self, quant_params: QuantParams) -> XNNQuantParams:
+    def get_quant_params(
+        self, quant_params: QuantParams, xnn_graph: XNNGraph
+    ) -> XNNQuantParams:
         if quant_params.per_channel:
             scale = cast(torch.Tensor, quant_params.scale)
+            buffer_idx = len(xnn_graph.constant_data)
+            num_scales = scale.numel()
+
+            if quant_params.is_per_channel_group:
+                scale = scale.to(torch.bfloat16)
+
+            num_bytes = scale.untyped_storage().nbytes()
+            scale_array = ctypes.cast(
+                scale.untyped_storage().data_ptr(),
+                ctypes.POINTER(ctypes.c_char * num_bytes),
+            ).contents
+            scale_name = hashlib.sha256(bytes(scale_array)).hexdigest()
+            xnn_graph.constant_data.append(
+                ConstantDataOffset(
+                    offset=UINT64_MAX, size=num_bytes, named_key=scale_name
+                )
+            )
+            self._named_data_store.add_named_data(
+                scale_name, bytes(scale_array), CONSTANT_TENSOR_ALIGNMENT
+            )
+
             if quant_params.is_per_channel_group:
                 return PerChannelGroupQuant(
-                    scale=scale.flatten().tolist(),
+                    scale=[],
                     channel_dim=quant_params.axis,
                     group_size=quant_params.group_size,
+                    scale_buffer_idx=buffer_idx,
+                    num_scales=num_scales,
                 )
-            else:  # per_channel quant
+            else:
                 return PerChannelQuant(
-                    scale=scale.tolist(),
+                    scale=[],
                     channel_dim=quant_params.axis,
+                    scale_buffer_idx=buffer_idx,
+                    num_scales=num_scales,
                 )
         elif quant_params.is_dynamic:
             # NB:
@@ -449,7 +476,7 @@ def define_tensor(  # noqa: C901
             else XValue(
                 xvalue_union=XNNQuantizedTensorValue(
                     tensor_value=tvalue,
-                    quant_params=self.get_quant_params(quant_params),
+                    quant_params=self.get_quant_params(quant_params, xnn_graph),
                 )
             )
         )
diff --git a/backends/xnnpack/runtime/XNNCompiler.cpp b/backends/xnnpack/runtime/XNNCompiler.cpp
index 9fd2c55bb83..56d0508bef0 100644
--- a/backends/xnnpack/runtime/XNNCompiler.cpp
+++ b/backends/xnnpack/runtime/XNNCompiler.cpp
@@ -421,11 +421,32 @@ Error defineTensor(
             qparams->channel_dim(),
             dtype,
             zero_point);
+
+        const float* scale = qparams->scale()->data();
+
+        if (qparams->scale_buffer_idx() != 0) {
+          // if scales are stored in named data, then retrieve it
+          ConstantDataOffsetPtr scale_buffer_offset =
+              flatbuffer_graph->constant_data()->Get(
+                  qparams->scale_buffer_idx());
+          const std::string& data_name =
+              scale_buffer_offset->named_key()->str();
+          Result<FreeableBuffer> scale_buffer =
+              named_data_map->get_data(data_name.c_str());
+          ET_CHECK_OR_RETURN_ERROR(
+              scale_buffer.ok(),
+              Internal,
+              "Failed to get constant data for key %s from named_data_map. Error code: %u",
+              data_name.c_str(),
+              static_cast<uint32_t>(scale_buffer.error()));
+          scale = reinterpret_cast<const float*>(scale_buffer.get().data());
+          freeable_buffers.push_back(std::move(scale_buffer.get()));
+        }
         status = xnn_define_channelwise_quantized_tensor_value_v2(
             /*subgraph=*/subgraph_ptr,
             /*datatype=*/dtype,
             /*zero_point=*/zero_point,
-            /*scale=*/qparams->scale()->data(),
+            /*scale=*/scale,
             /*num_dims=*/tensor_value->num_dims(),
             /*channel_dim*/ qparams->channel_dim(),
             /*dims=*/dims_data.data(),
@@ -452,10 +473,24 @@ Error defineTensor(
 
         // Block scales are preferably serialized as bf16 but can also be
         // serialized as fp32 for backwards compatability.
-        if (qparams->scale_bf16() != nullptr) {
+        if (qparams->scale_buffer_idx() != 0) {
+          ConstantDataOffsetPtr scale_buffer_offset =
+              flatbuffer_graph->constant_data()->Get(
+                  qparams->scale_buffer_idx());
+          const std::string& data_name =
+              scale_buffer_offset->named_key()->str();
+          Result<FreeableBuffer> scale_buffer =
+              named_data_map->get_data(data_name.c_str());
+          ET_CHECK_OR_RETURN_ERROR(
+              scale_buffer.ok(),
+              Internal,
+              "Failed to get constant data for key %s from named_data_map. Error code: %u",
+              data_name.c_str(),
+              static_cast<uint32_t>(scale_buffer.error()));
           scale_data =
-              static_cast<const uint16_t*>(qparams->scale_bf16()->data());
-          scale_numel = qparams->scale_bf16()->size();
+              reinterpret_cast<const uint16_t*>(scale_buffer.get().data());
+          freeable_buffers.push_back(std::move(scale_buffer.get()));
+          scale_numel = qparams->num_scales();
         } else {
           // Read fp32 scales, convert to bf16.
           auto conv_buffer = static_cast<uint16_t*>(allocator.allocateTemporary(
diff --git a/backends/xnnpack/serialization/runtime_schema.fbs b/backends/xnnpack/serialization/runtime_schema.fbs
index 79502ad4e51..d76c3c0807e 100644
--- a/backends/xnnpack/serialization/runtime_schema.fbs
+++ b/backends/xnnpack/serialization/runtime_schema.fbs
@@ -48,6 +48,8 @@ table Buffer {
 table PerChannelQuant {
   scale:[float];
   channel_dim:int;
+  scale_buffer_idx: uint;
+  num_scales: uint;
 }
 
 table PerTokenDynamicQuant {
@@ -63,7 +65,9 @@ table PerChannelGroupQuant {
   scale:[float];
   channel_dim:int;
   group_size:int;
-  scale_bf16:[ushort];
+  scale_bf16:[ushort] (deprecated);
+  scale_buffer_idx: uint;
+  num_scales: uint;
 }
 
 table XNNTensorValue {
diff --git a/backends/xnnpack/serialization/schema.fbs b/backends/xnnpack/serialization/schema.fbs
index a231ed05c5d..356df663dfc 100644
--- a/backends/xnnpack/serialization/schema.fbs
+++ b/backends/xnnpack/serialization/schema.fbs
@@ -48,12 +48,16 @@ table PerChannelGroupQuant {
   scale:[float];
   channel_dim:int;
   group_size:int;
-  scale_bf16:[ushort];
+  scale_bf16:[ushort] (deprecated);
+  scale_buffer_idx: uint;
+  num_scales: uint;
 }
 
 table PerChannelQuant {
   scale:[float];
   channel_dim:int;
+  scale_buffer_idx: uint;
+  num_scales: uint;
 }
 
 table PerTokenDynamicQuant {
diff --git a/backends/xnnpack/serialization/xnnpack_graph_schema.py b/backends/xnnpack/serialization/xnnpack_graph_schema.py
index 3a39fe98279..b8b4ea7f02f 100644
--- a/backends/xnnpack/serialization/xnnpack_graph_schema.py
+++ b/backends/xnnpack/serialization/xnnpack_graph_schema.py
@@ -425,6 +425,13 @@ class XNNDatatype(IntEnum):
 class PerChannelQuant:
     scale: List[float]
     channel_dim: int
+    scale_buffer_idx: int = -1
+    num_scales: int = -1
+
+
+@dataclass
+class Buffer:
+    storage: bytes
 
 
 @dataclass
@@ -432,6 +439,9 @@ class PerChannelGroupQuant:
     scale: List[float]
     channel_dim: int
     group_size: int = 1
+    scale_bf16: Optional[List[float]] = None
+    scale_buffer_idx: int = -1
+    num_scales: int = -1
 
 
 @dataclass
diff --git a/backends/xnnpack/third-party/cpuinfo b/backends/xnnpack/third-party/cpuinfo
index 1e83a2fdd31..c61fe919607 160000
--- a/backends/xnnpack/third-party/cpuinfo
+++ b/backends/xnnpack/third-party/cpuinfo
@@ -1 +1 @@
-Subproject commit 1e83a2fdd3102f65c6f1fb602c1b320486218a99
+Subproject commit c61fe919607bbc534d7a5a5707bdd7041e72c5ff
diff --git a/codegen/api/et_cpp.py b/codegen/api/et_cpp.py
index 5703af89c5a..88f1eb83fe0 100644
--- a/codegen/api/et_cpp.py
+++ b/codegen/api/et_cpp.py
@@ -2,15 +2,6 @@
 
 from typing import TYPE_CHECKING
 
-from executorch.codegen.api.types import (
-    ArrayRefCType,
-    BaseTypeToCppMapping,
-    OptionalCType,
-    scalarT,
-    tensorListT,
-    tensorT,
-)
-
 from torchgen import local
 from torchgen.api.types import (
     ArgName,
@@ -40,6 +31,15 @@
 )
 from typing_extensions import assert_never
 
+from .types import (
+    ArrayRefCType,
+    BaseTypeToCppMapping,
+    OptionalCType,
+    scalarT,
+    tensorListT,
+    tensorT,
+)
+
 
 if TYPE_CHECKING:
     from collections.abc import Sequence
diff --git a/codegen/api/types/__init__.py b/codegen/api/types/__init__.py
index 9de50ae744a..628c0637ced 100644
--- a/codegen/api/types/__init__.py
+++ b/codegen/api/types/__init__.py
@@ -1,5 +1,5 @@
 # flake8: noqa: F403, F401
-from executorch.codegen.api.types.types import *
+from .types import *
 
 # flake8: noqa: F403, F401
-from executorch.codegen.api.types.signatures import *  # usort: skip
+from .signatures import *  # usort: skip
diff --git a/codegen/api/types/signatures.py b/codegen/api/types/signatures.py
index 0b41b227c4e..6342a3f7a5e 100644
--- a/codegen/api/types/signatures.py
+++ b/codegen/api/types/signatures.py
@@ -4,7 +4,8 @@
 from typing import TYPE_CHECKING
 
 import torchgen.api.cpp as aten_cpp
-from executorch.codegen.api.types.types import contextArg
+
+from .types import contextArg
 
 
 if TYPE_CHECKING:
@@ -73,4 +74,4 @@ def from_native_function(
         )
 
 
-from executorch.codegen.api import et_cpp
+from .. import et_cpp
diff --git a/codegen/gen.py b/codegen/gen.py
index 43dc296a317..0dc1a167712 100644
--- a/codegen/gen.py
+++ b/codegen/gen.py
@@ -8,15 +8,32 @@
 from typing import Any, Callable, TextIO, TYPE_CHECKING
 
 import yaml
-from executorch.codegen.api import et_cpp
-from executorch.codegen.api.custom_ops import (
-    ComputeNativeFunctionStub,
-    gen_custom_ops_registration,
-)
-from executorch.codegen.api.types import contextArg, ExecutorchCppSignature
-from executorch.codegen.api.unboxing import Unboxing
-from executorch.codegen.model import ETKernelIndex, ETKernelKey, ETParsedYaml
-from executorch.codegen.parse import ET_FIELDS, parse_et_yaml, parse_et_yaml_struct
+
+try:
+    from executorch.codegen.api import et_cpp
+    from executorch.codegen.api.custom_ops import (
+        ComputeNativeFunctionStub,
+        gen_custom_ops_registration,
+    )
+    from executorch.codegen.api.types import contextArg, ExecutorchCppSignature
+    from executorch.codegen.api.unboxing import Unboxing
+    from executorch.codegen.model import ETKernelIndex, ETKernelKey, ETParsedYaml
+    from executorch.codegen.parse import ET_FIELDS, parse_et_yaml, parse_et_yaml_struct
+except ImportError:
+    # If we build from source, executorch.codegen is not available.
+    from .api import et_cpp  # type: ignore[no-redef]
+    from .api.custom_ops import (  # type: ignore
+        ComputeNativeFunctionStub,
+        gen_custom_ops_registration,
+    )
+    from .api.types import contextArg, ExecutorchCppSignature  # type: ignore
+    from .api.unboxing import Unboxing  # type: ignore
+    from .model import ETKernelIndex, ETKernelKey, ETParsedYaml  # type: ignore
+    from .parse import (  # type: ignore[no-redef]
+        ET_FIELDS,
+        parse_et_yaml,
+        parse_et_yaml_struct,
+    )
 
 # Parse native_functions.yaml into a sequence of NativeFunctions and Backend Indices.
 from torchgen import dest
diff --git a/codegen/test/test_executorch_custom_ops.py b/codegen/test/test_executorch_custom_ops.py
index 847f87ab352..67dccc3a8c1 100644
--- a/codegen/test/test_executorch_custom_ops.py
+++ b/codegen/test/test_executorch_custom_ops.py
@@ -15,8 +15,8 @@
 
 import torchgen
 from executorch.codegen.api.custom_ops import ComputeNativeFunctionStub
+from executorch.codegen.gen import gen_headers
 from executorch.codegen.model import ETKernelIndex
-from torchgen.gen_executorch import gen_headers
 from torchgen.model import Location, NativeFunction
 from torchgen.selective_build.selector import SelectiveBuilder
 from torchgen.utils import FileManager
diff --git a/codegen/test/test_executorch_gen.py b/codegen/test/test_executorch_gen.py
index 23dcbecf64a..30c82254de7 100644
--- a/codegen/test/test_executorch_gen.py
+++ b/codegen/test/test_executorch_gen.py
@@ -11,15 +11,15 @@
 import unittest
 
 import yaml
-
-from executorch.codegen.model import ETKernelIndex, ETKernelKey
-from torchgen.gen import LineLoader
-from torchgen.gen_executorch import (
+from executorch.codegen.gen import (
     ComputeCodegenUnboxedKernels,
     gen_functions_declarations,
     parse_yaml_files,
     translate_native_yaml,
 )
+
+from executorch.codegen.model import ETKernelIndex, ETKernelKey
+from torchgen.gen import LineLoader
 from torchgen.model import (
     BackendIndex,
     BackendMetadata,
diff --git a/codegen/tools/gen_oplist.py b/codegen/tools/gen_oplist.py
index b1f4af02889..3d26797fb24 100644
--- a/codegen/tools/gen_oplist.py
+++ b/codegen/tools/gen_oplist.py
@@ -20,6 +20,7 @@
     # We can use relative import instead.
     from ..parse import strip_et_fields
 
+
 from torchgen.gen import LineLoader, parse_native_yaml_struct
 from torchgen.selective_build.operator import SelectiveBuildOperator
 from torchgen.selective_build.selector import merge_et_kernel_metadata
diff --git a/devtools/etrecord/_etrecord.py b/devtools/etrecord/_etrecord.py
index de7cf93990a..d5ad81fe255 100644
--- a/devtools/etrecord/_etrecord.py
+++ b/devtools/etrecord/_etrecord.py
@@ -29,6 +29,7 @@
 from executorch.exir.serde.export_serialize import SerializedArtifact
 from executorch.exir.serde.serialize import deserialize, serialize
 
+ProgramInput = List[Value]
 ProgramOutput = List[Value]
 
 try:
@@ -49,6 +50,7 @@ class ETRecordReservedFileNames(StrEnum):
     DEBUG_HANDLE_MAP_NAME = "debug_handle_map"
     DELEGATE_MAP_NAME = "delegate_map"
     REFERENCE_OUTPUTS = "reference_outputs"
+    REPRESENTATIVE_INPUTS = "representative_inputs"
 
 
 @dataclass
@@ -60,6 +62,7 @@ class ETRecord:
         Dict[str, Dict[int, Dict[str, Union[str, _DelegateDebugIdentifierMap]]]]
     ] = None
     _reference_outputs: Optional[Dict[str, List[ProgramOutput]]] = None
+    _representative_inputs: Optional[List[ProgramOutput]] = None
 
 
 def _handle_exported_program(
@@ -157,6 +160,24 @@ def _get_reference_outputs(
     return reference_outputs
 
 
+def _get_representative_inputs(
+    bundled_program: BundledProgram,
+) -> List[ProgramInput]:
+    """
+    Extracts out the inputs from the bundled program, keyed by the method names.
+    """
+    for method_test_suite in bundled_program.method_test_suites:
+        if method_test_suite.method_name == "forward":
+            if not method_test_suite.test_cases:
+                raise ValueError(
+                    "The 'forward' method is defined, but no corresponding input test cases are provided."
+                )
+            # Get first example input from the forward method
+            test_case = method_test_suite.test_cases[0]
+            return test_case.inputs
+    raise ValueError("No 'forward' method found in the bundled program.")
+
+
 def generate_etrecord(
     et_record: Union[str, os.PathLike, BinaryIO, IO[bytes]],
     edge_dialect_program: Union[EdgeProgramManager, ExirExportedProgram],
@@ -244,6 +265,13 @@ def generate_etrecord(
             # @lint-ignore PYTHONPICKLEISBAD
             pickle.dumps(reference_outputs),
         )
+
+        representative_inputs = _get_representative_inputs(executorch_program)
+        etrecord_zip.writestr(
+            ETRecordReservedFileNames.REPRESENTATIVE_INPUTS,
+            # @lint-ignore PYTHONPICKLEISBAD
+            pickle.dumps(representative_inputs),
+        )
         executorch_program = executorch_program.executorch_program
 
     etrecord_zip.writestr(
@@ -290,6 +318,7 @@ def parse_etrecord(etrecord_path: str) -> ETRecord:  # noqa: C901
     delegate_map = None
     edge_dialect_program = None
     reference_outputs = None
+    representative_inputs = None
 
     serialized_exported_program_files = set()
     serialized_state_dict_files = set()
@@ -321,6 +350,11 @@ def parse_etrecord(etrecord_path: str) -> ETRecord:  # noqa: C901
             reference_outputs = pickle.loads(
                 etrecord_zip.read(ETRecordReservedFileNames.REFERENCE_OUTPUTS)
             )
+        elif entry == ETRecordReservedFileNames.REPRESENTATIVE_INPUTS:
+            # @lint-ignore PYTHONPICKLEISBAD
+            representative_inputs = pickle.loads(
+                etrecord_zip.read(ETRecordReservedFileNames.REPRESENTATIVE_INPUTS)
+            )
         else:
             if entry.endswith("state_dict"):
                 serialized_state_dict_files.add(entry)
@@ -352,4 +386,5 @@ def parse_etrecord(etrecord_path: str) -> ETRecord:  # noqa: C901
         _debug_handle_map=debug_handle_map,
         _delegate_map=delegate_map,
         _reference_outputs=reference_outputs,
+        _representative_inputs=representative_inputs,
     )
diff --git a/devtools/etrecord/tests/etrecord_test.py b/devtools/etrecord/tests/etrecord_test.py
index cf50662c2a1..dd1d40e0292 100644
--- a/devtools/etrecord/tests/etrecord_test.py
+++ b/devtools/etrecord/tests/etrecord_test.py
@@ -19,6 +19,7 @@
 from executorch.devtools.etrecord import generate_etrecord, parse_etrecord
 from executorch.devtools.etrecord._etrecord import (
     _get_reference_outputs,
+    _get_representative_inputs,
     ETRecordReservedFileNames,
 )
 from executorch.exir import EdgeCompileConfig, EdgeProgramManager, to_edge
@@ -135,15 +136,25 @@ def test_etrecord_generation_with_bundled_program(self):
             )
             etrecord = parse_etrecord(tmpdirname + "/etrecord.bin")
 
-            expected = etrecord._reference_outputs
-            actual = _get_reference_outputs(bundled_program)
+            expected_inputs = etrecord._representative_inputs
+            actual_inputs = _get_representative_inputs(bundled_program)
             # assertEqual() gives "RuntimeError: Boolean value of Tensor with more than one value is ambiguous" when comparing tensors,
             # so we use torch.equal() to compare the tensors one by one.
+            for expected, actual in zip(expected_inputs, actual_inputs):
+                self.assertTrue(torch.equal(expected[0], actual[0]))
+                self.assertTrue(torch.equal(expected[1], actual[1]))
+
+            expected_outputs = etrecord._reference_outputs
+            actual_outputs = _get_reference_outputs(bundled_program)
             self.assertTrue(
-                torch.equal(expected["forward"][0][0], actual["forward"][0][0])
+                torch.equal(
+                    expected_outputs["forward"][0][0], actual_outputs["forward"][0][0]
+                )
             )
             self.assertTrue(
-                torch.equal(expected["forward"][1][0], actual["forward"][1][0])
+                torch.equal(
+                    expected_outputs["forward"][1][0], actual_outputs["forward"][1][0]
+                )
             )
 
     def test_etrecord_generation_with_manager(self):
diff --git a/devtools/inspector/_intermediate_output_capturer.py b/devtools/inspector/_intermediate_output_capturer.py
index e3a904487eb..c1f943bd02c 100644
--- a/devtools/inspector/_intermediate_output_capturer.py
+++ b/devtools/inspector/_intermediate_output_capturer.py
@@ -7,24 +7,57 @@
 # pyre-unsafe
 
 
-from typing import Any, Dict, Tuple
+from typing import Any, Dict, List, Tuple
 
 import torch
 from torch.fx import GraphModule
 from torch.fx.interpreter import Interpreter
 
 
+class NodeFilter:
+    """
+    A class used to filter nodes based on extensible criteria.
+    Attributes:
+        metadata_key (str): The key to look for in the node's metadata.
+        op_type (str): The operation code to match.
+        exclude_ops (List[str]): A list of operations to exclude from the filter.
+    """
+
+    def __init__(self, metadata_key: str, op_type: str, exclude_ops: List[str] = None):
+        self.metadata_key = metadata_key
+        self.op_type = op_type
+        self.exclude_ops = exclude_ops
+
+    def matches(self, node: torch.fx.Node) -> bool:
+        return (
+            node.meta.get(self.metadata_key) is not None
+            and node.op == self.op_type
+            and all(exclude_name not in node.name for exclude_name in self.exclude_ops)
+        )
+
+
 class IntermediateOutputCapturer(Interpreter):
+    """
+    A class that captures intermediate outputs from a PyTorch graph module.
+    Attributes:
+        module (GraphModule): The graph module to capture outputs from.
+        node_filters (List[NodeFilter]): A list of filters to apply to the nodes.
+    """
+
     def __init__(self, module: GraphModule):
         super().__init__(module)
+        self.node_filters = [
+            NodeFilter("debug_handle", "call_function", exclude_ops=["getitem"])
+        ]
 
+    # Runs the graph module and captures the intermediate outputs.
     def run_and_capture(self, *args, **kwargs) -> Dict[Tuple[int, ...], Any]:
         captured_outputs = {}
 
         def capture_run_node(n: torch.fx.Node) -> Any:
             result = super(IntermediateOutputCapturer, self).run_node(n)
-            debug_handle = n.meta.get("debug_handle", None)
-            if debug_handle is not None and n.op == "call_function":
+            if all(filter.matches(n) for filter in self.node_filters):
+                debug_handle = n.meta["debug_handle"]
                 # Convert the debug handle to a tuple to use as a dictionary key
                 key = (
                     (debug_handle,)
diff --git a/devtools/inspector/tests/intermediate_output_capturer_test.py b/devtools/inspector/tests/intermediate_output_capturer_test.py
index e6dd782d887..7ad673c7cfe 100644
--- a/devtools/inspector/tests/intermediate_output_capturer_test.py
+++ b/devtools/inspector/tests/intermediate_output_capturer_test.py
@@ -111,8 +111,6 @@ def test_capture_correct_outputs(self):
             (19,): torch.tensor([[3.6000, 4.5067]]),
             (20,): torch.tensor([[0.9734, 0.9891]]),
             (21,): [torch.tensor([[0.9734]]), torch.tensor([[0.9891]])],
-            (22,): torch.tensor([[0.9734]]),
-            (23,): torch.tensor([[0.9891]]),
         }
         self.assertEqual(
             len(self.intermediate_outputs), len(expected_outputs_with_handles)
diff --git a/docs/source/_static/img/swiftpm_xcode2.png b/docs/source/_static/img/swiftpm_xcode2.png
index fbe5d51fd4b98ecec043e5454e8e0d1cb5fc8d04..db811ddf05de2d1e69a6dffdff22220290b5eb63 100644
GIT binary patch
literal 55550
zcmeFYbyQqSw=c@xaf4uuLxNjFun;@}8kgYESa65nPR9t+xVyUscSwK+g1ZFUbmJ0e
zT$*0Dll`4@zH`oZ?ilaBJKkS!F<8A;Rn4loX3eVl&EKjO@>)gq(S6GMczAe^<mIH*
z@$hbe@bGRH-MxWJ(XO^M#Qof~dZqjd53efn0mkGO?*0!~by-Qgiec(4-1V%DrmmZ=
zvXZcwlLMEDxs#~{mzTp^Tq!&}Q7>WKrGtf=$rCRJdq-DcFERSxC4_O;zmmD>pZqT3
zW+z6ktNi+jl#`3aljmH|xSr9A-+%JtiKvUYrLelR%s<3&cVhI`Zf<Xdxw$<(J-Iyj
zxSU+9xOs$xgt(vaa`W<naV5a6-i~f2USLO8hQBNMw|b;4T+Li;-n!X1IX?MSuZgLX
zyPFt2{jY}p{rFo?3oo1hXvxv_ACHB5K<-~T+&o;*xc|LwoT%upRN>b)UKaLx(l!nj
zj;^>i#Cdpxctw8;{I{(CX!4&#b^l3}@7aG6{im$|Dyrpb;UeYafNRuET*J)80@t{;
zliNSq{pZYo2#Rw5diH;^#ovbhor<%x_<d3CfA2N%`)36lak!4W_f}5N6%P-@^y~i*
zd3DAEJiI^Auaz_*006A5t%1Q{4Gj%uW@Z43`vX{5Sk~9q0RX$RvxCKA0|Ek4Qc^fL
zI6^`~WMyTqudmO~&)L}6WMpKpSYT~^?GInz>gsA@VuFv4PfScqS6BDt%a>hUT_-0e
zFJ8P*R#v{g#%^qETw{U7#l@qeqsYie6&01n&e`JP;>O0t&d$!++1bs_&5Mf*0|Ns=
zK|%UR;I==2LZQ^v)z8k(^7Hf8aLuf)zIpTJX*j^n&VF!k;O*`G?Af!vzCKAw$>!$f
z(b3WU{r!c7g{rEm%gf8Z{`w2&RqJbOtLv*qMn>uB>2h*%KYskk%gbYBW#!`HDk>@}
zD=TA(2I}kUOG``5%*+A<1DPX%p`oGP-rn%=a9&>CgHvE{Z||lb@P|Kete19pczAq#
ztf!}!mX<a@Kfk-Xo0XMyask}+1!%(nQBlzc?|_PmiowCb>FH@FC#R^WsPXafnVA_?
zRaF=awsiyu2nfi_%g4pV<>chHw6xrQ53oi7OcB8P+Pb5oV{2<G@kc;JL?k{wzOJtB
z!-o%~fdFF^aESr5w6t(NGqws)!-3P&Q@T)qECfKK(SCk@^kIOltu3A(z|6w(@I7!p
z0O;uGh>3|oB9R|Ieq3H&c5`#13IP%m6RAQ0UteEybMxTf;BVi)J$?_|^Z}%$r7bNj
z;c&Q>mDSwb+}pQrqobpl!-0l|hDRTPyD(s6WQ3K4MNLhOEgIO`+Vb-9QdCr&nwqk(
zu*l5J{PN|Cjg8IqH9#5!xVyXGg<**T0T&mS(9lqOd;5zkfZ!9*-``Ij479hmYieq4
zqJiQe;Or7$iv%Xt0R#d;83@ot0PIl!VGy911tcdYBbI@qb3h~xh-e3rx`Ej(Kq?7f
zVP*c@3{;K*e35`cDqvj-3?hNOV*r{9G|vG3^}zBTkkJdg-3LqxfVY*v>ON3A2{beT
z9yLJ2G|<%4^lbsy*w{q8SA^l=O;pQEztZ&jv70`pN-~Gj((M}qJy`iDonMk8KsonS
zy~@qoAD=w=^9WM=E$nf^ao_`=R||L_2N1IKUMq}@lv002bZ;slX=A@V5l%^Z!D!xk
zxb6B~!X!YaqefFp$BC8QgJEBXcf2qIs-orDcA2)3wr#RTMS&ZODz@7_H|lP2a!kFq
z`vVUzwE4zJBds$H9^T89H|`FNxQYDDo-Em4^uH!xymy;>5Q9H|U;cl1(p{wf&SyN_
z>V-zyhm{g{31hwyq@RI3_<39kR@GMSd+iTgP*|;i@V@q+5Z6D=*-j&tHFuUEOb4cW
zH%pK~MI|M|p4DhqZGhQb7s=yZaUezJe|=9-DLU)TtyUn>TMUM9z<U?CLOVWJZ#h@%
z6|Gdg(;f>-^cu{!8SC;-t(U_PA*A7%C7{~7U#+6{AI#<!{}i!~_7U!A0ZD8mDNZ0|
z+M&br=pa!nW?A6T^2H)Xvn~sQCqA^NZ4=JtIRfvP;u|PNoSsrwiYj!TO&~Yuj^=IA
ze?;zIW1U!Vway$Vyg=FTz}%JisbGR}nxYckR^nZ{J8SQQc2v{u+mosrwKy-+Ii#<@
zM4Zf!36C=yWQY2E1)5cYQOfKL#?3X@*RbTOP~b#P)*B}C1!N2&AsK{|d8c2%7Ls+%
z%37`X_rChE&l|q}GIlc8yU^w7kwQ~JLlN#qgdGDCGI{H)e|l#)t==zxqxS8tae02$
zUsbFEBtQMVvg>&S-fj}yn)kv;zj`m)!jS7JETeL%RTQzmf0sivy1q+&YM2=2)}Yn*
z@!;)yT>*CnSk?2+V!cWYLgQXyFG7QIM1<X9$T%_~cIWKp*K0hy4zaWxNWSBy#Q6ZF
zp*-ze_m8aG>Q+mo8P<woa80hCuMyK9*)hx7@gu2`S*$%7!8Z;&NcU_@v$@)4Y!V??
zPfJ0b@IXH~;W}m2ckLRUhwp9G`sS+M13SR38Fm0z5YhY2LA#RIuRq1*4)zHbOD&hR
zyETNpAT~NLm1%R#;33%KN4B-T&4D_czy7RnF|a6^lDq!n-ba^P%lZ}V^v*e2ek-g%
z>MVJH?W-C2NmQEGSda0--HK_`-bX(|CXrfAS@%6};_XH?QcRc3h)BDlzi6r(H;Zdl
zL5f_bshb)R^l=WBw<|?h^Jij7<<-{uy)tag>J+wuYSvY-_pH&UQLXb7EhaJ!op53w
z@ni2&uF4USf>BfWsEXpbyx*wu^A{u#_$$)dORL6lrJ;(DJcNsDSJ3B-RZ_mXO(!1h
zBn7P&51*tRaln@aGx?!%!W3$-+W1X`5Xw0=pz5rMm(}oC>c}T2pA``M)?$kMoTF46
zosxXu(qYYtJvw8E{yLur6*4a+DpqhZy9vn;d1kiJ{nSVRu05bORvmrOky~w|X>IMC
z7<HsP_KB#}L4-lyg02y4F)>yFp91E3F*8xG6ldk_qsz_4-9fM5-$*g;srr*|=Ib5f
zA%j9_+2?@kw->bzsk&or4*Iv|IqKy^*-O(3c0$Ua3ykIhZtKx6((!w!bG$SZ9BMdr
zCnYO1f<fg|(ACDLB28sw9DD75G~C47KQBLE{Qy$5QWp$8?bjFA1ozK-V-@#DKPQZA
zpBc(3Dh@a%Y-{_-wUzP4XNR(bm{ZI+Yl^qToeIQ24Lq6LY?MlS2zz@40mnQLRZ1Wq
zWamqHaq(96QpE}k;IMt`T(%NpXoGf*WUHHXJ(!NBF=2T+ueW1$IN9iT6Z~28*|Ykp
zkp1kSs{y|GabB{<(7nsuG|lV*yHs7YGJ4NhdS+t5Zz+Kx;kNH{2}wixnM3D{LlVcI
z+o*I~aqcI_laeKbIS|D<kceZSN*;)y$+BCAaj{eE4{b4#i}VB8n_!<~?2qT}9|zJV
zG^R)&fj!|ORWq}f+*7g^yI&gTKa#jRjAVp{P~WG`A@#Ha4G^DI$PJM;)zm1uftS{R
zL8AukJ!2*`U-ON)K}@E<QZb@K$xcsI@_3vsgk&eM5BjkD%~uIC19<yk)#w8|>ne@B
zd!OtuQnp`p(hkIA`%-dc2Ck#94O#*;QmWuu%CWk}K4&RU`r3093F78YzJHeaIF^7+
zia>2hgec58vx*W`PwJT>CQxSZ<m!!pDp#l|qX^r5ky}W}M=dpGS;SXCjve9CgEZ@M
zb%}X!wa*HT_kd<{&&OCAWHIR&c3>68+}%l^j~Edv7|qc~NU9=J)RqYK;Mt_LgP)q1
zgHj_ZxZgs?k@8)3hg8<mTU%M~%4+eHxrfM*US&r;v$N=f0SajN<}ZiO#yNZ|s}X;%
zr8*&=^Jh7<7{N?V8Z}A{y6P|D!`2_bls)QV8(&Ntsernb6p~kDIab?$zUaucX$3oi
zjYyB@h<_MQcC5g>6Y}$MP2dq&J#6a5=+ND?T49;W9k*{i(xXxPrI|H)Eh->vA^I+q
z#NF4-XuDKF_0`BiEtG?s>RZ?0TBVHkq>F9#RB#0X5<%5W?7D;$u_nYDrWqR_YjKM>
zgp+ZDf^5{trNkLxp8Ona{lcp_G{Vz8%DOMq4|?tBaf*HhsY?(go$J)}znR67g}4)4
zJ4^3dSZB-HT~W(AuX9-3OtDrwB3h7=m1~;K;*5xr`6wn0S1th2Nl()srSWyo5>`N}
zR#cku6xKL8YgtRi897EqR<ffVCOtvFRC`vZOIz}{cGiUNo~TgWh*uKpu_Kr<g=ZYb
zG#I#h>3vyCrVd&2rHn@vnFL>XN?BQL*tEe@nflin3^-+NohyY&tVYmv-pwGp!Sz`3
zwT!E`(x)?YMGHC9df#=Rm_ajW9k~xtr|CtGf%TR&Jo!U~f*yYPn*$UhBAvzPo~uqv
zXYC7Hj|D_BEj$aN7XQc2hr0giVNQS7f#N`IsWj6XR+Z1T&e5pPKW{>BJ|X8>uup6l
z7%WTSBrbtC*HDu_>gQvpg*uj1R&IPGt?+bdPn#{07vCS3=xC9UQzMXs7PngUM$G1e
zeB~QnCybQc5L)F&o3z`R`5?9H$b^XeN8>NEwQSIQkv3DNoAPBFuyM%1*`r%B87wFJ
z_l@w&vyN3W$#cJ>3c0sWUyRl@*s4v2KM&=8(vo6GQh@O6A&Z3toH<sp@LU+QlI3%O
zd~lYHa&7lkeBnNt1JNu0-TOHEzF9r4&vqwvzjN7LYPAB5ORgvtv6TM!3I9`~)f5R)
zufD(#x@og*=vktiqeDbV(T$vsqFoXDn(5Ze>uXICaYOTyD#v#o3&O`TDH2jR5}K)V
z%Wb(E%w0>Y`qCQCy?bkId5#%ZE&7Cne0s<?2^|kJ_9V8Z2A-0}NeS@J?O4r2xDz+{
z#9Es+A31?}pO&?Ms9H3=qhmmba}D*2L4+vMUgEH6yK{7=<^=UA4Js#)hw(7lmN4Tl
z8?ps^EJUThgyVfW=t-G?b#~Sz`3OFSjXpLKe==cA(0%X>-79)OzudOuBMb5qdw(wN
zGoH@|y6G3A59Y2dY3@T5t*lDiNKTG7rMNJOBqw(iZ<oe{B=(cE!Jjjvog+?oNdm*%
zNdmQ^_IS8-Kb{L$ZSb?)mJqSBt$N$H@Jvl~gS-H8a#uByIzW719W~nJ!q_B?arvy-
zrAEVRV5V(Xh5Eev%RxR>^S!6qk+i)D=IteBFrt^`lI)lLnS3@|^;hc8r|^m*PuqA7
z0R{N(N1kVG*<aP~T!{9FYZuZsWmB7R`=+i|q8Q0<Fl2fv!9iUrC7Yq_hSP;k8^s%O
z#clGL%p?Y{dW^`i#R9O(!;6T!(1(-dE=iC{(=2v>zdJPShMQg$1WZA{Om|xH!Kq)a
zD`g70i(7-=gZ{Y7bdu%x?igD0RFK1npHgGlU~!y+s$!eK@>9Y9AH=A<P+E1z{rSM6
z;nm}D8+@M><~n_T()pd#@MK=9$bSEyWew9$wcM)rM08E?JEGgJJW^{^LO0HgkRh$M
z7bIsJv`4BtA4X$efKNw%h$WPdpp&EbI@mhY@GG8VVhpegKXb`2S_lT4y_6tCg4yDP
zhHs9e$XYI+kV^@ZFI|#{`RFJZynJ-fybZrn;S*?i{qyN6ZmDus^Q$z64GMnFyYcH;
zUWs_;(4L#vvv1zv#M52erQ_;2-$6mN-!3kR?XyJ~aSd$$Fu5=U!(cP0FjZQ2#r^24
zLo~KxGctp8zw#N~qq*y|W!S*-A|Yx{Oi_5<l*PTRfyY|L!x^@bf#A@!FYB=MT#SZ9
ztd3m1Za5{@+DZ#)_dX~N)XcSR=ta=$*hQnbe5(Gy<9NiwYQoJu*o?wYH-ChOS6VX<
z6OY0_IYs*5;eCxrU-yzHthkMb_cr5tX;)lA(EEF!A}*f!KRQV4aALaeK!4yp(S`pX
z)$sod{VNOjsj44$$WQ-?3}%QpCwL5~voPN>y0b4p>fB8z>lfJ5VG#OVei|39|Mdh{
z;sugJ2s;JtF+Y)9-f(g;;r%UkKODyM{S~Z?@hPrAaOLKC1BDxpMe+yS4FlG!yQ3$>
zq+eM<h8kC;aWcB{D{ruwjVE&|6RrhnbUY;h<xZKdTjhzx-8y&cSYzy3M%mu0TgQ_8
z)*-*A0}{i7=+iCPl92yf2Y#$p`exi;rhhjdNeD=Wjivq7gz`J|;zbV78T(>nUv$*j
zp|SL|ndnL-@L?N9^-}^1a)46xd<~anOmu+DN<>#WKKo3^bq~E3!NQ2SyOH?i_C7b#
znGD#jN?466LU9LkTZyi0(5+yI0(!3CUag1@?a&n>6?r1Cy4Z*FLj+9WoN!n*|Ic5w
z8Ka#R!&UIBz{mlT#Pm&4Sin6X=!^{gA_8Wk!2>y&jMapR-9zt_02e^Or=!?(>(-9o
zUSrIFJnt#>`Pe>*-Mrg9G??rxVCpoJxWgSiSN#)c>49CYxi(15@eng{amm?_sMdQY
zR=1&POU{?zcG}xlL0*{Njvrz2mREd2jfN@RmJ3i79Dg+R4IGWyR{iFtcxZ|)x}wud
zTK9Fz+;#DDx!N<=zr>;humdSY=$x3C+f8o9^51Pv0Y9j0+_e)Oj1F~lHNp+=-M3x*
zhTcOxSM-|}L{s#hCF+u2y;_2GcPL`upOT1XU1QLh{f5k`==r>ZlSxQe>7L;D1?xP_
z6xcsroDk_PtWSg*FlIb!nX6kx^|E7KB`05WASq0*{KmPSO}|F@(pUiV0{USis<j}p
z5J94FvO9=1U+W$#o6ZHS75XPc6sx*@R4C5utyl`{ywKAzXa|D1%`o#}l6UgCPP&rO
z^Aoi#6+gQmf+xo^p&a*%nEFpMXMvW$nC9k=jJA`pMOB9m{~z_%3w+8HisAufNYZ<1
zC<C~KU;IixdKvNIHmvrVqqqAI*Sk=DaiR7s|A(PR(>YAyBH|{F`%l>;{QPE$H44{d
zLzq>DMm&WRv|*9;QG|*66378^K<1YRiUB<zoW>s#j<`<bVR`DhX?!9x8bBZGRo!o%
z0;yfiFxdm_GFUXO7nn~>GTGHwp0+49j*;4z4w{M-3Ygg0f9n3=Q@4b308c0o_gEmn
z_nS_St`&zeQ((8@`#qRqJTwe_v4z@FLv~D!4sQusiXyU8F@mVY_;QHYPYM>)je}J5
zRu#3xH^G3}1z1Ub0CIx9-t#7nafdplRNapmd2lW3cY!p9bT2aIC$%d-cT~SdT`mU_
z(TZI6?71$dC8ERNf`Z3!3xt^~;ixUs^q`s<OWhc|Re$%Lx*+yf+9FeN)~V;uuB1tL
z@kO`42RMJP20V{nH+^wW*?03g?I}Lperkj*HA{8X?=>#L2ik`m{DDeq8^-+FA;<WE
z6f3$T82pA7c^O4v!d&g>C2SjGN&=`#PM$kZa+{q<p&}>St}PRc0~KpLcess<?S)pN
z=7FpF;*%d8B^T5KM^{27Rp-T91^12fjY)&1eEaJ9P`%s+u9n6pRM5EnrDMr~3;uTh
zxe!4qDJK>4n{*PdQpGvXDq)QVen8@&>cF6R1mD#{MQTl9H+n~Scs|l7DcYCmBssX{
zj1D*SVd3PtdraTM;L|yjgiL}!eFJfwiUzX7@6|SS^n{R#h+xP<&Zac*is~G^>!WJ!
zNhENjqSpW2IY&B2Xm0!}bAW%y^O^s%FMuaX0bceQpVuRp*EQt)&b21TvV9s<HKfFE
z)tl*xzb*Qz@g=wU_2p)z`Fc{WGmFIJFhiKbU%o2#*DSTzOyLf4+>8=TxnTnj12zg`
z9+Em`ha)@FJc%V<#YmCQ``3PbfcemUj*(^&_Q7x&Dzi~YJS7W0i-O@V$;Dc=7T&_v
zJCxpA1r2$Qpg$L+$JZY85V~PL*~FAXg4Vj2B)}-lgo}BN!8xXV(w+v`oh7qAm8+iJ
zX}m}dF2&ixA{^%6!}FbfF6uosX@~NJ1@971K=QQ{^P)lQWp#4I;p^Vo>s1rrTlbb3
zXNVn7NI8p>#4`58Yo`M-L!dHor{{9-xEs}Hn*q9Ri*qb#k8{SXG`3AFbl8HK^LJfr
z+t_wTUo-)W?w<tk$8tL-I|cbC`*i&U#C$Lv*Fq>vg2HrNpYAfh;M+TMs_T0~NpijC
z;?j;ht$!QmB9c=YzD!$~tk;}b*sEV4Se+*&om@zB6xp*kL$O0j);*TVSDs?TBiixl
z69j2NRTNt{=1RD+zw)s+qB+g^VoDnl;(K&zuv(Dztl`WWg`15^=@S|RqdBI0FgM5X
z;n(|GbQR9n6@?WtNC0$Jxn!x`Mm2zGnNAU_I&r=xBU|Wu8Q(8R*1L8Ud0ySyQ9_(f
zT`bg}nav|I=3|G}q&GCL_z@|(hiL9I8pJlF;0NzT)`{zsBZ5%Mfu=*9k$R?AL_Ynq
z5)bkbeyTJ3@cD0JQgx<*d~U-F9oFDO9SYaDD+%OPrsH?~7LNd8<K`{QIV{#2DSjQi
z$!bUe8v(-Zf)B6H!T|*J6zY!jbtIE`;vAc?jMue<`>$Cf=CQzV<+-avb9eB-H1ZWa
z_SUxw79xN*iP8C0j8%nFM<n{<dSe5E%+-wTKAp9JRjXNGX+uX|eN6w+tsOftLA-uS
zdm`kjIq|gW`e})WZe-@mk~&@8RHjpy+oe6P2)~%W;?2kxy$x{kGfpRPL3DW%9Z(hP
zb+%aZ%!vB;<VXRe6qxp#*3b>TZ~|lbE{-ET=;p%GJm<j6$I#QHT`pJO3PgF_PE{CU
z7kOyLHZ*Z%j>&GM@gbX(^>@RhG{MAQS-BkUJ`fZ#KI+R39CFzim>@I9baq-K9QJs=
z;u0+s9z#{vC3T97?V>*{k`{Fctybq9!PP{cKDABT0QpZyvm7_#?70u8q)=oLufDSM
zlXmoT{071;Zd-*A<XKTFa1|^~clt<gsVfl`VoJibcHY@3{bo?t##sW;{7y(wp|OL0
z1TG<%alcp*u6@RVI+|qAp)4hVib?_TiW9}}@{8u~IiF)0+FQ|1{MZ;G3@~23koTZ_
z;v?#Tri-B;RJB$R%}+Px>@nIP^kur|R(qNBa=@F#3tBp5)l4&c>*kkS^8@>T<;^g9
zKpsq(3kFdxhnHTTwSsyj_}evTJ7fen=44H7oSn<+b+{NmX($pNyJJ(6kEP9QzNMbv
zhU;w>M(OaQ1?1~a{puYY<=oIX%8A;y5m9uGO*v6_OwJ-sJ>naom-Ooq3^<*uu@WSE
zj^Tz8kl9)oXbK_x=2HfrCa#kBe;AhkQC7b!>Obz|C+>}I_RAMH=&tMQ;PB{&kZq+1
z2NcX24|Ue2t-KKPmvzhOg`*CyWic&1i7~oL_h=8E2UM}Ju@1MmsZNZKXV8$L!(z6!
zrpMDGgQu%8f#1HK-a3r+dI@Scit|(+W`Mq6DR`3>hc-icSfkeM9UZyY92}m{C%4um
zoi5kHuU%3gp_r^AlyUT#;{DVtj@GQ=)Qv1#FjuPp6wc;D8o)PCg;+TOCNB`ISQu_W
z$Ar$HF@H2&H-jF;{UMFHKK4jptccJ(iQ1IzJ;qEkN=NSNj~0-v_L}ArWd+l%UaaIj
zN1Hs}X(VGF+(WeVlGQ7h%FD9lPg;rvd>ExSKB#hkZyO;_uWaoMwLvGVKF`-s_wDX2
z2w_)LNKsTMVl;O8*n`3i|HGaR_NF9CFp50*NX0FSiuYAJ)aVemifrCbo_C_o<ho4X
zJu=Nz_jz*oMyGeAYLe`M7BBXt5~}>jyX({`LRv9wAGj08i~`aFx^+d0DL^!g>NaGX
z@uFwiOCbTpOwE5%`37%*ViSFS4Qv*iXM5&c2rMlNFJ+|IdgSy@1$uK=TkoL`KQMZ7
z#`T<I*aJdnmqk@*Iy~LnMvvq4EW{mqCGNn&b>K8CbVj4hocl{3&eeAcZ7<kV%L_Pn
z(BaY6sIE1U-WUim^z{nE1*?U`2$=b({p$V8Yi~y$2CijTccK{9-E2%7kO^KsRZdJs
z2enftQ|F;kgHsB*IR{SP=NY5*m7L>WYY>izj(5sbVOsa8>7dMX`v*+d%E#h?7PH=b
z!8KN%9ZfJHB8Q(ql<MMjEJ`RRb+2D9iNyQ6on$~N?;0`DokCyfEhcAhGZC3N>1Rc+
z2fTr*cQf=ZfAzD5<Yc4l5Q*3(v6(hTSj9<+Xqk)`8(`AX_(TXv6lOu0zL}JsiH%dt
zJGVL9i-I#3=tq)kEyZz-XPc=^c@ML+5rgZ`tPdACRMHX#u#Ad7UdR`z5<@wUqA%Zq
zb%n<`Q<tp1#EG4)VTWzcR#ksm3|(nR+c`7;GC+qEMln5!B<^c%lx>ew<k9<w4A_)i
zKizx@&sQk_ZO;c{emO&^?7(tP$gsZX&w?`qJ&*31ocrSA^v%3UK~(#A!$#rSUB*l~
z=fFrUr|;Q9uE2f|P({6bo@F&x2%|GI(<$6*{XY6V^8=Mx&<JOrf2%<f);G+-o4dB}
znBA+;o79WS<8puU%Rj)=+D4Hx&nb63tmQ9~`_?PVylg#z-j&W^4`H&3+gx&-Bm0S&
zIXxn*vJU%)<KN<j`)w|0WBkHe{7p8Q6t`^ix9++6N`QBXOGm(7sz;u4imtDE1)*?c
z7lP8!dg4$-XNEqLX{GI=sL)-`I(p=uer9w^%Dz7N8~Fgr<uiTJUqIXrGmxJ|J$XkJ
z|AMY8#kq~R9NF8pO4VAK4_9$>(ZkOaTxV{E8Mx&%fnqO-A(`H#GEzcQactE{`(0%u
zKX@=aG{I;*`r9p+ksoyh^Xu5~LW4;x0eF+{SdDJZiugMV)yFGP(MfB1h5Um?m!4ih
zQ74Czbz(PNy$DrJla+R3dl>9}MIQL%7o22!YKy6tLBd(RcSXG$Hq$phqS&|CKO3Ci
zTG7ZH(b*<rOxUmzC#C^;3WOU^8dngTIbZdFwU#ub9{l83{rNz2EhcSn-bd%0{IFmg
z?Tk)HaJ(-Z(D}lU>_L4D&PCD`Re0<A0?H}q3JX?VHFeBTGU1<&c?AV;`iB2Tk>N0R
zIFDYN9PdOT+k0AJ*c_|X4NCO<b8)NFF))#9XA(eth8iX2kz|Fvk_`*xq&8<GYsK*S
z6zLCUKfccRuy?Xc@qO%lGS%6|v*XyFuh_y#SE@@a!MMuxx$k^wvHOORR)^V$f33-~
zY%}w==e5zv@wvU>>7V;ig}XwD;^Jb*=7|f~wT}whcgumoVQv{`0bN3#rN*q04!;HV
zf}zYz>=Q1f76Z5N$jyS_2gp@2bKL0R5yK%RYczqTx509N;Dx1n*xB1x!$Us+^I%Yl
zSnAfu*HdmfEtlBq9?Z^oVK3hd4S_h85{nXY0mxcwqf(dmT1sHh<3_9fAZ5RI`>M{E
z84>;O8;-Hy``F~9pTS+f5Ox@B$V)Nqp~W~7t$Y#$exLj)cpg{=``=c%t}GGz5^(vM
zHp9lve+f+`TJaKdjz3U&r_r&p&fck9vd5&iaGQ<=%?eC>7+JzLHh1!fcL+#uoA|c%
zv;RD##-E4DWJn>bEI}gJkp^|CQ^3Af64T_Exs-Fd{JNtz3_c=_X~Vgz6K4Klb5ga^
zMLk+LN2FzT&&*@);_CFuxXQWhs;Lt<VnQ2>mc{z1-^MA&jdF?`2g5j6KC=n3U8EvL
z4!rB6{C@tJ|GYK_D1@_Mjv3N2ZI60d1Kg>COD+i1R)<kXf#*6V>BJc%lMO1D1!yTD
zDppUci|RdCV9(Hol=ksOu`IcnafiVEPm#S;<K3?t573pn5SGAtO0f6fsuQI|_289p
z2>MedjvBbSkZ{)MxJ`yrLUvOZCQl~J^!mO-41IeV`qL*I8Sy8v+dl}0XmceV-UQoA
zRU+g7HNmCf4ykXzI#7%Ei^U)TN>tnw-tthO_sNV~PXCDz;qE3Mx|bm`%o54+i_r<L
z+&$MR09_biStJLD|BZ5K{ze|q$oQL~A*K8eLgY6E#Eq>}f0+0Tfc0V`DPH}ib-e$b
z{rJTrDTL!V4>iEJi?JB%L-d;?>W_p8$@gU3khr8dhLKDE7Ai(pJ~xY*!z$rBCer`b
zWb&Ns9uL!(4Mvnie2dcGj8Qb2yO4e*@lz%-QGmkvR$?YGHumeq)`qdP(EB@HH-9V4
z)YBmemq4duyU(r$RFO|4t~bc4b~0C;iLkh6o%?SM;C%mz0Q-NH_Emuac)3_zW;g2n
z*XUu!*RStg$d+XAzBVvKPje46-|m?Bye9UZKS$Y%>JPk8`r5mA??BlQya4iTMFqQT
zh&2y=pkO}*QQn&qy()e&zH2iByDIxEafUCf+;%<HA99P9>6GE#fj5H6*BI7I(yEs_
z!yvU@=CEwUO<A9s=>ljM#65_C*3>df7rPjv)hb!%mCZh+?xRxcyO!R)3dcVhx;j82
zOcvJO6l)JEsj7~I&79Kf7^0NvqOX6t>p3R55s$?WG%p)4=y;8Ztu<Jq^Jli&5Ws1}
z(PM#uF`JHwx#uROjQzA)RF|c@(!+crIo3Qow9EYHA9h;mz9mh4VTNmk#57@VgU9r8
z`b*7<gD$<oE4{VzKn-dU`I^o2b&YxxT;!E!Ha|cM>u=<}><vOY!%om82v2238?t4<
z>|@ExEmB@TDeh*~R4Da9%vt`6nn&!luhQloj?HUp4JQgaXb0}{5!TI$<n1?fJr$0*
z%-F=&XldSJEJln<Da~0pZ%4Er(Qp{UBS+$ENQ)s>618{RTg&@=M^EYp2glEkP3dO_
z)}X3XGudyK?p5@L=ywb|n}lrM9b11sQp1*W^cnP^&CR02x^*hb|FE+8yFv+C(JH!O
z2({&*{*#>8--><cOtgh94HPg+FL)u`Y_!y*dK^~u$=(G!adO`6KP5@hV(WkIw4wA#
zYBFr8gmh?TVWwd{k|?~)4_q;*gg29uV00V&fty~nmed<)T1ucTG+KrFF~s=T)r6cE
zUfCW|nADkaKVp}%s3f&(d$O$#^-EKnt3L9H-^@EtRXx_Mn1dDya@UaCt?ns~?RzZ1
zixBSp)`PKfOfIqhIc~l;v_vK=RH}l5X5XeJ3VUz(y)>Ao^*J)LsVZ7dnY4JM*LGN$
zTlM9|_#wHSS_@?fL{=@maIhKrx?(Q+wa$BIrms!O8Odz2`l5ad2yK!?XxVTqw{6Jh
z8}Q1QNGjV|T@U}mt=36XC*rByD7gJ(nKn(H&bZ#0c`-cLiY${*xev+Z%Lv9-FQznZ
zI(Cx|6E@I*6FfdCQQ?RC1+yIDSfW&e6o%9<90u^kVOn=0ZwFChg^j6`_U*ybjuPgE
zKE1eu)ldEh?gxi_!`kr=ECq(`sz#A@ddgRPa`1}p%s>142au)k3xDwpI}clEChn~4
z<Ly>NWqOm7Z-U%zYwdc|%btswQUy7!rgvQ3d9l*gB#pV@mtd_r+Dza@>k{r~sC4X?
z%b1$7IvwkB;gD>w%lyMa?uh}M!sAlxHG6$2z6jl$=<+U#HZ=LlV_Wa%ZBek1*8}}C
zNFN1Fma*)^{P-3vS06b>t0s`@k8*Dv*9)Vzr$gJp<EGh&m_J%P5KdF3@da<;P5_0n
z?|e!BMajo3S*Mrx|3c}zTB=;ni=gL1+DkhQ`(=I{sG-DHq4tHlQTT#RjYR=!AiX5Q
zVfMrU;yloPGL;Hs)HGe3%dG!3cX;$@ep9J+P+vg_-D~({qDOOSm+aF0c#Evuwe)mO
z)O{u+(R6|F!uG7eH4CxhmyW=l3)84M6x7@%<+;@uQcoE*w;IxtQZJ-xt43v8TDgB}
zv&2)UK5R5%9Z^8@k@V#PaSozT((lYcBpyUJ7XJ*a9-^p2HOIJG56(d}5Z3nj&w%9N
z7T=&@9>H`PaF67aH8TS(1(@rjLnObT1ThTI-g@XN)S~Dy2ltp5CQth~nv*L%p^^e!
zrHz|2_fVM_H|J5ycs#>6B2dZ1rs#3rs;w86ir?J<x{<yqpJ2su#>Qi1SvrYi97DeW
zU(U<BzZ+oRDq|waLA+$~dG-P1-Ed)$r{b0{7NXq}`df)@O^rKSP2<ZiM`Z%y^X_tH
z>a<y7r_jPOQ1g3YQ1`%Wgo!f8YbUEr4?SD8wG;_!$`LLV>mRM0hf~{cC*GgW<cP1j
z$k)oAlMmd}V~>WWJ^4}5+(4#E66!88n?Wg7S{ME!*Dji%smRZ7^(h&ZgGV2kA<K_X
zLQ+8n{g2+)R?>pmi{GN>sJc7E-dhDsA3OQ=+(9+)?i@~1&cTgTr>b7~7DMWFCL)C0
zz~WQ<{tq|fwHfM)Hc~8HoYg06EfQWBPxL$g)`ap*jEiN{VBBgrtt1XAsxqmW*KB|M
zEvaEvA;gr~DZWB>CN*LG8JLpTP2CuhmgTDDMD1M?NZ-k_x#_7ACxuCZP8PBmFtU?e
z4osnDW|~fROU84m(i&DF+|4p{0ihwFIpQ%BZD?!Z$yQZRO1-Q#`rW0d99Vq(&o`qM
zQ%4(fDyTJVO|EkZ#D~}K>M`v38G$(2mzdTIecF9R%_fL*i=(3*=U<fbJz<3gGEIkG
zGtD`X>?6ZEB&+1H_sgLq$Qqhfe;cktx1SVCs~CSz{|oONMHgYuwXVfBGQOd$g=SDL
zHBxp4`3?dL_lpn_7^3yZy}Q>(isTv(vxFyuJWp&+-;a!$wiFa|Rw>sQz5{8C_&smn
z>bmwrBC(B=$QJP_$~iVsT-;{kpI6&Ayrmq!8=wKls3df8ar++6mLlJRn-z|OZZk+=
zHh5^U`eerk%W|)+gnm5LhOj_qmZij9Wgos0{A3eNv(~l=>)Q*?20_-}o<>X}^=gQ(
zU+zom*Ugp-@x8f`RTC?5$<j8<LF=hU93)Uy5ESmzec&2T$uO4)?tzKO_Oa9z1+eGX
zILa97aLpJ098<YisA(~H2)+#ewnjg-YKCnv%^6vUz^4p#o8?4PJ&sR30kU5$s$6Ig
zvX^f)glWi%e$uBCh0zzf(lM}K5!PDcH$8-&2+AqDtk&bE$ct7oL#ag`p*vFWGO1HI
z1Adckoo=ZO?Az^k$qwDe>PvD4T`aCv5j_q9f~4ZK502j&UzYReNNP_Oy5f_8eqN@f
zTi#@leRO!APQSo*m9(8x!SL{@HX~eRw8>0q^t*sIEfIt1Bd5+fuf}+<l1^fMuw~jz
zoo+YFW?wO|ev_sY)KD}RCR#c<uKoaA=Htb|Gf@v5C_5sIb@m|Ed2hJNw<Eism&}z;
zIHeD#w^hEy(y@4Z>Ykhtj9`9>W361zqaK*;pLd>B3Ek0v@75aHL_4f)bxAb<ttKiP
z^|oq^)RpP2S}(>+v{1bif6O}#C^amuifvBWLDuh$A$zs1LuNsZk3GRqgGvbuOVDej
zYwu%Ye_x_k@LPTt?{)3eM0bfURyv-53skn$uI{|}<KQ&slH!M!Jw&@+EGb#*`n?zJ
zC{mcGKz+#kgPTU`v#ceAsFk>p=GdTXZTTWayy>&0YJ)t%lrR>a4v~-08nn9TF8%WB
znK^?Bv5mCd=_>){5dpiDK><Ng5((0k)>b+hyy`aHl(`u3&>cMSSGExCJd7<mKvk;y
z<bu0&5w{KYFPqKF&6+<ANcF<cuc;qz%65hRHQ&{;nEq?eZdRjvFhED0@_Qq1q4MBH
zS+6?|e5S9Y2`=_J|JwKaf9H6Y6a&k0)SDLA6n$&$t@`G#mrZsdLB@W>cwgCB{tGNs
z6vZG@ajHRZ+oEvi)+4;`m`lrfS!+UQnA}RZ%V&EQU4QM`{6eego;uLL?QOn4;fK|h
zoE$GgFR#aHlXPCZorQ{<R`j#&fCqJ@;j*V>P^Gtdf_4ri0tT9%FOW|yZ-=fegY^(_
z5w7Edh)>P;6WwETu*pR_Ozs7+0T)B`11+0`ao3zhj>>D{i<~aMBk@)*XL3&=T}oPg
zA4^LgpVoS^rfo{6%ivwl$^n<Iywu<dgX!ya%BD{EPqq#SoV!hBAB=72NDnH~i%_{r
zAFjWr&!l!*^kdZ6g2g3cjt~OZJ3B83nK3Gj`JpicvD{^sNZ)+(Nh)lZNXv*uQEal;
z&Toj=5S|_6Y1p?Wa_H#3mAYDo78|`Vq3~NBRMW2$NnKlR!yHU{zSYS;`r6QeoM-J-
zXE%(7E|l4jh0R>?J3Mf7{Zd<RYUAVMW8-7l%wkk`UJ=5$CrR-y;O)WnZPFjFxhb8c
zo%FSYjBShbNUKMrCr$UG+L!TzHs!W`b3~N(whLnOKvD!e7Kw;rk;`FE=?T->m60vk
z#%#MPDN+tkMXDHOhiN5QMCRUQKkztR*AXl(t}~1S-s#t)^yf%gH{7Ou2Y(w&9~=KP
z+vrV~^1|?)*Dv;O&Y-%>jW-%!NV|Qqc1_)wBXQEDfN~y>$6x1&X-l1c@7QMMDDL4-
zhb%phES#6eO&4xP#r(@hy9IBd=da)kt2>)M-W%d$d_6?f-!D{WP{e0i57c%`PF6b7
z`3UjV_<TyW`HfwTms2DZ_=$#CvxAmp<7#I3Ql#9J93=g13R{HzR?F|sn8G;b8Zw0a
zIaAv|g2+a0c2|Mwj(M#K{Rq8V)04hmjnHCewCM`}M#)V#|2HUk#rOXZCGUIvM#;no
zoRY>nxkKKV{goqg?u69Y9k(5wKs$87IG>2&b$T6n%$|MUc07zGJP;9`(5nmoG4&cO
z9J(!PF+KIcWhh~lOLwuAM@GMqY{CKJT#VQjy~s(he(~j;{h~4ZbGXF}OXjd5`1HBj
z5Pnmi3_C&Z?X7sXX)(QRdgL6!N}9O3ae!7H{!z1}M~6#{hA<a@W)gWrx~+tGh@K9&
zxv0&@4ZhaLY|jY(OdIe)E+irIb;rVL8D|0h_|aRirA7-|sl|+GLrqYil}M#<4USg%
z4t4=Ci3*v}`O&mu!fLeEKInxXK{;!^9JD8}565%tszX|;53A3&O#?C`H&W75rVBSx
z3}cOGX|_L;|AErRSg(@vP57eJ;g^Pp^4#i8i@>8U-wV830A42358~RNH!^|@7>vn?
z#g<vW7ce)S<QW9$Gn2K1cu)}Gy7qH=06|>nUP;%mZ)LUni(Elk25L$0NCDa5K80;a
z4q~)RmB=tg1i3ft(j@<W%)U<WldW@46tl<)O>JWak)Bt*E6t;jUDg?-e2c)oE!T3X
zU><JIC+-1sdEd-boR>q(#z`E}dlI=ezsHcFjhN33bv(MlK|<@1$@KGG>E$V7Zwm&8
z^CD-u=1<(!Qb$qdI@dXz)z26V)VUH_<iJaOf@z^y2w{JNp15oyN``DjSDi8ZXs~ZP
z+=l1)Ad*uA$|5*M^^+{*zEoY`e2cHv7$8<h9=ErucUu^`H20mi;x!U6p~v9A{h;5#
zXj+P3^%NH6q^MNeH_q=coP(%I>6vIE_7qTF<*<~V_)^~9K40kFJE_3iRBF@v=GNmg
z9Pw8Cc-enmB_AQ{#-s$-M2=6%JipiE0Jlav+yuKU*FnZj^}c{|X-wImnn6y29_9@5
za>1ZGl66U0EaDco+A~2PtJ4ks2N<pbq5=ym@bBdnLw>$W9=6!v2P=b!*X}th9~vA%
z$B$Pl{xt@t-}j>>n9k`)*SynMHoj>~sjpv0zKOt>8KvKUo(Z1}8Riupv&Gp8ZS%_=
zIkcb!rLvpSP8x^}-iBG}NKJJ2*`(>~de`*Hx?3Fz>NgWcN{#ph&vwXdqM44Zr$|48
zf&`QeCy?TM)ss2>0<cm348ye1T5rA8(y5h#8|X@if8Sb+b>u@Tde&&@subzG*4ZgC
zM6$~9&b&shRb4&B4dUq9zFtvHaq+__c=&UXRd@xlgQQ`$6xZ74mp?j`a8vol#;ML1
zkn+8dKh9WI;~_eclSmGFAtevrO<!9cxyhX(mV?2O6m0m2m%1BA4VP+pblc{MW*eI}
z)ZABKuSjYJsi8d4`_9NlKppN=p~p<dB2cc)<S9^2El3j#1D7~2qg*}=Gqv6DE>X8d
zzfjYN-mV_IW42_<!4#U@s?|(u=mbsas<j@BlJ_Td{xGbysF-1!9ei+4_%fY%?Q!O`
z>E=qqV~5y7Zflz+^R4P7HA8KqpOY<<l5jfqVxMJfsMOgOPF2?VrU;jl^AmSRF}9Lg
zYt9X@_ZYllEGW}72XZRymJ|=#xAFJc%`Vs@glZDpW%{p}zObXG7_{~MU{4A&w;*wD
z@Kxb(-O0O^TWt-5*lTVouX<-^PsNwvLa{cyVz?73ULq=DtQKXa#9nKw7Cho{aGKxL
zH_>F}6?=5Hc(S%}aknaM*zn@5=RQKoRk)!7!5Va43+loj@tvAcSz@OyF39MzeaGNy
zyn>NXISieXp}G)7htx$jXtRT>pBqYRRgL|Q06K@1Yh>vHZO{ql!<&gm@@54HL;n>2
zzg%l)D{OpRbpFQw7#;d!Z~eCX<m4;dOtgO)chloO!S;kSl#UIYLNH`;mI__<tfuzx
zp3ChuA`P+|)5?+HC%;&ch(sSOH)jO_G9QEoaE?j=eJJL)9oL@QO<)^)a_L6SM%qj4
zzvqD4gPP+-TfNZ{WHuWzeR38xvOp*``p5)oF6+&GN#`PBS_Fv}N^4`N^P1k3mKYTo
z(*}Fk4L_X4+vGo@3yBKvHqy8BSx<RUW+EVF@5k^tvvl$aLLA+fTJHMEPaVEpNT0H9
zdF;McoxV-G7e_$bGJqJ(Std8&8mA*fcaReMuY0(SzRv5i3|W3xv`_}n-P-o(&kTKy
zf2iM6%s|^&PLLrHJ<$|N7$3MVaeVM-(a8BL-(hNqQ+2U&QY8d1$wjg|Ck$m+vX7A+
zJNg-5{8=_{T8JoLKT?mSZrZy)V=BH6nRzV;&j;xR?UKn-yHwd$;Z}eTkMJJU5-MIi
z>jYtEtOa|XrrxKEyyer#ef{cD<)^YD)eUMc&V>^pk)gXX5tOT>;`K^XeH?yzam}kz
zNzyM~jtYew74_NHikn75qb<ac5t2tURif$c?5|()1B<n|S(5*xA^fwndhLpGBZ#1Z
zXoR$TolH*Ye-~tYg||{U%x8MWO8n$)w&j*&xaG>$PVhy`sd=dO^$4$MX??iu>GrXH
zp{ssPdI9!4@?3PzA2YIk@RY1)2_3j^b`+5UH80##fZKDZ($O|wZ#=?zs1}@lg*CRN
z94ZiyM)>7Ar|m^zQ%ZTC=+o&-^5r|a19#FaOEW_06n1n!msWvakF<cmw|^P%--3-+
z9>qJRM-jJcS!q2&FbBp_AdSM~qU#%JcwW9}F;8SkJw3-f?VwT`T~?_~Q(pt8DwPC*
z&tvpct(pV-4WT1+PN-!Dqr={C5y?k%!Wunz)Lb;=Vv%ApB|H&nLgw9OXyw{;n&w)q
zzdyq-HdAK9JF{@`8l(B?=rDl%kIg;A@v)jg5NUVxVd;>pQyf{qa_(nykw>9tzLfM8
zBQU)^O9R8_8J{a6#hXO=n;vL;`&RK2Yu{W>JGta^!vz$6&(EG>VQ;N_e#IIzXvT;Z
z+8Nv)T({4zOKx<%7XNobj$gHyDq!C?Tj<SX9esUjV2<}yf&SlOjoY|eyxP0qKmU$1
z4(28kXT>GqPkwRV7ace5^cyyU-U#T8B_t<PG#JH|a8MPE+~w>aNU19Oi*@@<cEZ$X
z<*SL^MQ{^)VOhg7Gd1w=Lz00G{pwJ#qK2orp2zs>*`&*l{<K`0`FaW?As&1QGPO2L
z>0LPfTLVRB)cPW<g_?6Q6!&Q=dK*&F+><I!f3>*NFBR)Z-|V8N6Kl76pS!sb0|K`n
z3%50#FJnDub1a2~_dJE?neF4H=W=bOVDagF-j=e`;Lti*s}T0eh<w^@)SZrXLO2d@
zQH?)8@LzW;IcZ)xO4r=C$7G8?{+AH~KSx5nc|l@<m8B0-3%;KsH_p`#^xoEbKh9x(
z{@KVa^LgEiznJzFu`@NV0-sO96X^b#0mrR*Ga1zb=aV%4Z8Uyp;r?=U1g&?S%8QW3
zs^aFI-YF!?HHWzE&$`XsAka6m2GS-%37_+h`pXwn^ke^p{w8z}{_pfR`k(0UvHwng
z*HWm#HGiS|TIqkFd-}RQcfTsn4=q)JX9N8u#Ra8LSKdWNU>l*DWiP?DTNb!Lg#J)L
z3(%Xb;^%7Xt2>u5-=DT5MysApt@Cx-B8(_CFZ+!+rhQ&b=GUITNUYY1O#Gh+?*hc5
zhKpsDr4ic4iN!%ntYG|*B_WhiYuD_vvC15`swVFVg5zY|w>~)mcIt@&<|q9f5j~wq
zxdF2>$ewKxhz8=G!MkJos>xDT((qPKJl=+-Pcx-MG54C`R!-xjse6W>nj$~7u3x+#
zJ{xo0?&a!)cQIm#xn%As*<6ucxv*Si-saEm>STl?T;dEVQ3%6RR;VBSejIJ}_gg~M
z0{wMGv?1TEbA)=ZOxxH$B#6I3cRfy7!RvKsMvioynN%yi{|CUl-RxfibFocmjbJ)$
z<Pbu-*|J2zVr=04M$p+h5t`sYFGuXvj`&~ReFBH>JYz@UG4{4G`l`6Ege+XDQmqr8
zzVCxBuo#<9Jh(V}SjP(9rkM38F|j95m7oPz0%I~cAM0?)J02vWsa@wH7^`FY50?96
z_h)+6n(s-ya4CZVJP@RKd-n%vhZbBbr4!tKjr)Stp_F~VfKR{hZ=O3&=ubllcNTGx
zG0zhnqvr1bjkxVX5SPh+=DBk?{)6WpGi|Cslso}5Z-<9S1bewSOKRsAqtLda^e6f0
zKerE}8zlW-9XNd&R!MLUB;w8(rrgpne!YHJP3x0o%x3(M*Vh2fsXVdH2^B515lt0!
z)|q&|ArH2dayU6z6dw{P(C(kxp%@ygHT&{su&-q8xn}TwyF=kPlA)QX0K)FPRZdbZ
z-{+t&4R^x5M6_q#Ya$Qv_-S|xT01bwNrx&?zf84zyoOz0w?ghOv!|PIS$Tfv@*Q*h
zLGjV`G1M*2`iHy*eBmQ+>fo>qG=-D7pBS04+O%HBXz?9CbA}!Mo8mRiA{P68xVtKj
zhz+&>cb@yg0I_VL(-Jc*Lu4o^$Kvx=9{O-wS`mN71#*RvXa`MADbeI@Y%4sx>9DFJ
zANuvsdkVh3tWs%~2T2&s)zTq{Qcu_Z8E?2X!KDZ;F*v$|WBL*Ilv5KS?NAy=;l}`U
zGRS}{<n%6`nb1>3CUrQm^t^@h?muH`HD5RI;Zw5s$VlN;A_)ABh1#Pga#6<SW0akm
ze4z0S-;d`FEK%x)6CG#@y8Y$;vH=Y{b8a6otzOeOBirb`soGq8L%6DdGTjZI{3$u`
zc9UhYLE2<3G&y*9rq*S_?FqJyRU>q_5yx7~D?c$WWX}gh9t|4(4{%v&RNfzg`*wR2
z|N3n!<&FcBOK1NpA(2g499^%xLa&ycHe1nKS?RiS&mbRSCgk)TZsZm&Zb?cEu4c*U
zDy#KU-QjCT@H<3~BL6t?j)MB--TIm}P_i*nnBFsRZ6%A+gddxqpFYLf|D^N#(}c<U
zs6liovB(9E^jKV&*8RG{>S)%Z!If*^-D+nzCL3?H!ExUu%p0x^4%G{<|JcAiG)7b>
z2VTqg(-CpiN)QmU`6H-Gc|NshOEzU1Ef2=mJ)ZG0eklBJ?6zI8gv>H|^FOoO(zXA_
zZiCJ$n@Js#xBdpVns(-eZzIK(^G>42=|u_!wW_a_!G|Jx_${rwV`i11U3Hu6V|0|F
zHH=`N<t+jr<lX?qO!r%O(U2{<%e^h>jg)(z@Of@7J}!WG#GT~;Pa7CMLrIdm=0w!t
zRzoVsH`<6Drc4{w2j4w<r{ao(twq(QK9-mN3A~bjZ@1^^UJ#NLFNU0G5GuR!T{gSS
zsklda^lxM=u9;d#6Wpu)z6U?XWC7(#hI|dVA3t{z*jDUK_kPRVoM>`|%YL$VV3*jZ
zB&6wkG;{?k8tE?lY;e2$k}8i`$Yq|i0^zp=uLv3Y<oVx-?U#j#fSfd2N9jy39h^LT
z-qw-bPZVU`-*&8TR`H)PDf5q;4&D<Yl*Sd0IQyw)s}u@cOYaGLhTRix>gks<l`i`m
zb&hMfZi;I|2VYd$RRyb#Mr!~=_k^{<j%qa&-rB|a`5>t7$w52cndws6n5{uPzxuO8
z&;*iT<iFF}mE97eM=e+9nL{g5Twx)fx~^u^J{ChfepJ-<)gK<OWY5Evp5cIL9V=19
z?>NY2ssxP+ds&O)g9Q18s8P1vxL0hEr3P^3e=yoJq;5>2mzC{eAz6q&+c7W8>iz}5
zR6EY(n~>E(GqK_z=KrGYJmZ?$*EVg(jtB$=0Rg24M4D0s5{fkG5SsK30--k%si8{m
z5=!U@gx+kSx6nHhTIkX{%)(vHyWg|lnR8~&_{pCiWSK0puK#u4&y)7t0o>+W&|n}(
zw0ZsVz0=FdJbEwc5H6kWwb{t{c~i|APG*dAgHe^F4cC2neNU$=WyaH)&O^}pw8GLV
zEw|3+!v(t*2`Lr>T=5px&WxVSUtL^;gU=}o^LaREu9ps<91i!pALNi~K#A_=U=9Uu
z5e?1@H#u?^%DqLUEv3t|IjI<O<96*|e5{MR#`4bQ_L`#;jv3fMfg7MXY4G?BF_U>X
zjL`r85i`xGz)Ni3yJ<2j>)g4AQW@KAC52zs(0Uq64gUD=6*)}6vW&AaF#tMo-Uo9%
zus@9U$=X>*!<yt%(9fZ#oV~(-qqPkq|Ejf>5u6t>8*4H7`fWXZlo~O~TqHbM;^XxV
zjFO0$5y-Q=JFScIv6#6a-Rzg8Q7ObL#;hTxy)O@B<m>TIxKE}qh;Nl`U+#_uWyUWg
zIa{&Yd2>x{5e={Ds8J>SR=-I2`}zfk?myQrXw4P!1^J|vT^v}qUr`J1afhk9_s5Ti
zmwT^qqLc~-w^P<KTulrzf^2N2Sd6JRC}8LT`mOwf<jxQ~7gTSR=8@n@%-Rrd>`#Br
z-5T9>t0)7e=t4=b^#J-t<B~^0`Q?h|>UZmStfAJ8?FY{NxElHoXvErSv53mYt?T#e
z)5ykAvf$g!Er*Kecf8r%S8R<rfAiRj=wBYYhgR3Em2FzdX=~f@CuWJU;w7nD2EF{V
zsv75<Y8H1gie&Q+;E5qpc1<v^A%K2HbrV9pml|wwpdqL+$994$+uKXRLyBI2`3}>d
zKQV%M_=Hd4{IRK~yDLY;=+vJYo2=!R#<uxWV`E*!5WYIqCA#Q6^JbxawQ<A8{d<}C
z=Fz!zI6sq9a9}-W^qgDDI0-T+8=8B+PK5=AtSYkUpngmaSSG&uA^tDpLthV#jQW*(
z`@`RSmXP?i)BOz6xm}|>LA1e7q?*`WdL663cz@!dpi9k;)Xa#78kkx2<Db?VNpP$%
zOJ<ewtRF`aQKy+G`(1ckEwSrr9;utzs|6Of0`;ZK3b8%7X{}aOK^<fy&PE8URAp?D
zJW<<@8^D1%(iocROkrbhG`GxLgxy|fWl_1Nqn|x;oey?fzisdh(h$^xnVWlJ@<yTU
z=D2%ikNz7g(Jp2P;v%F}Vyo_ZBnWX(Af-JJ-p_K%M}7lA2Mz<2dR`g~PSWr4?r!;C
z$k#RNffdmTrsv-#UrqsZ8x<5GKa<=e##8lBqFlTR)KAB8UzV$Br~xvL&Gkr`$Rj&t
z1Ya2h?mJPmbAaSyb_DPDU$u`+Of64}Vdmzd>n3a7sS0-$v=jNBfnnLF$R{?{wmets
zNuI3^0GUM{<|C;hFt{v_t(vM1Xws>DZF60#RH^r1ORLnPr!8G<{0`k~_nR<ki}k2R
z6X4D$jL%j4o6-l$A>`@xe*uQUi;lTdKJ5a>&gfd99!Yq7q^DXDn-tiu@7F(~GL(6H
zPR+jk4@9mm{X0Y+;K=Ju?3mO<Jih3-5;U2MVV0c&O`dQj?3AlS8x_VE^rrJZy!?va
zsIxuC%kBTf$OlaS2S%nlN}K2NFqCi{lz-p<>oV0vcRR#w$4ny*dgPoO_L?=*>L2d$
zikdLARf-0Z$(w|1<*+l_yJ!#!b{9qcRaKX^0IJ%`KBW)I)2FICxzbx}V4j&&mx6sD
z%YTLCZ6yQbp}g<j>Zl_#SP03Y2g|q{%SQh<+#FJf(Nsjxl<mI<<uy;dM!%dE|1W)!
zlAz;`j;`;M4Jv%!v^rNmXzw7$(UBN&MCe^velzn3w^zoI!_Q@<BQX#cQLRXU!P=M%
z*5Rp5m=VV>oV^L>Wza}T9z!`RMp#9|k{<C<<wJ#2DH&^ujH2N@AMkfY`LI6HJ^HU0
zV0M);EF<%TYY?YwwQpyabnQ7y8b-QtL<=!%r!M)Zwst)os&d*V;#yev{Vy#NkDvdo
zMKT=y-(B_g6;pE0(X09Az>7Y_u@E=Ip|}Uyv}Mmk(AuCTwiHH^CC5*K+^(t?6lD^F
zwA6MA`+M19Yzk=SSD^lI#1}C!b5JM6M6bWrI}2hv)pjxX3s7$c=rFL5G*1YAWcmxe
zu^+T01ihmc&n)%c)k9Td?GzZD8+Iv&55(z~0UFkk<zMVB+f(k{;`pcr3+Y3qSj4no
zq-67=b0*zoS(;=CaJq=6na!bXIjpgLmqZ<Znd+`((j2zL?!bh<Etg<>(9h6f!ViiX
zT{Q!fKH0tMynWpdkmFLhN{*SBA0zLRNbQyL3g_Z9Q~cvBCWMyP_#|vIpdJZdp-9o^
z#oA%Iy7L1{SuYWdb`u}S>hR<q?acHrLwSh5hS_?`fQdW8Y=kNex($tbShgp5<E==n
ziu{+}zbT)1|8x1IAlIthxz#9&CCsUS<sf@w*&Qkp%3*`eg<^02YM+G0WJArFc1hl_
zOw4=yjI34I)wAr~p*O#tIA9AD<b(vc!yTcios?RM;iY$p-)4W~+?+R7l&{hfE|THg
zqNgf*_8O}@?ZJWPa@$gf1K9+!zdN})d2dnyQFFj`_<7#6Si4twaWHF`?Zu5eN})u}
z*r}7At*v7q@N@ELCF1vW+cKjq2#W#@Kd;OAFm7WWG1+1_G;0(_<)-qW5ULh<;yfPz
z*bI9p=d~v<qaG864>UP~?TYVx#ZsSW7C9aIm=n9I<`=@EshGeel)E!O6sD+cM*};#
ziLX-XU#+H*K`b+_K~Kf3C4dwBQ~y$@3{Y1>GM>PdVI&u0-4TDD;#_j_LVSD!xoUfG
zlSxMIg5ER*$B|ZMqvVbeLS(#?XhSMBJ2l8OArU(m-R79<hE@{VU1lSr!iin5vOA;|
z6vmSYH@>jv=QCwI`ZzaY?eTptP$-$d!Wn|^$lBN}B*J=+bw5~Nn$gJGlR}8q1hBQ1
zl=0}9V0cEH9e9=#c{SB6_?^10E)il7pbnZ?k-U>DIZ`RF)G9%nF6`Cx2k&>+kkA2|
zLky9>Rz57hAKd`dZF()H8;HNGErO2-=%T2nKgjxqHC>RciuJT;j}9l@UP$`p>supa
zg1Y|&a+vevzmPx8pyTB=5gA3P@ek4A)#_I7I)kE<>y6+TUI)iTpvw{LZ=hcU<rbL_
z<q%;QRCkdKrsS@@Hi(;2tv0_s-t-c2f8GHMTEE6;x7hGpq$w8zHs_%;@G3|5iQdRd
zIsS^MbHUMk#;Ujxku$|f1Hs@^r7vznJ1W~%I<g*$+YI{a0nLANTDU}Q>Q(ZW(?V@}
z7HHV4riXU4wb$R@`qbrHQm0ivQu3J&zf0bm43)gbFwP4DZM}%{#Q6BUp&)%vnLE_6
zg%~H`#~nG4;_mm|S{e8Hc|LwjlcO8iqKyCzE)kwOT-Lr8c5-McPitEIHrPdiqYvpk
z^myZ&mk3NIxhH*x@XdvA;mYRJU^!iks-e-S4=_0iDt)*6Gx}SRL~uk#2kGRgxd5T5
zZSp;(oVm8pb=__=;wybMar5m>2<8MAMRXZ|x?<?3)KVeG8S|$p-;Y*rnj3G3{0l`H
zYVGP=w{GdzhifJcgK2#10xd4^a-KrZnI~nmd5oeXWzG#K8r;Fr{E&5H|6x;a_cpdD
zJC8Pxs3G+t>#?aik-Pl>MMD^{J>u;+-}loB!j8(@Kt-jOp5AO=nh*HX&NrQjtJ9S?
z|6>OGg-na^s|wa7ayZp95|jzB9F^$La|MlD7jPxO_Bf!weg53Kq(|ur9bmM@Gk%Ic
zKXn2cHaB>$t3$U&fD^^^07CJR(Z%ii-cT0}&KS}M(Bl)+o)(q|&n&^z9BQ*s+aP0B
z5k$&_A07joCOkm}en4xcS~VQR{uh1ik;R}3k0U?*y}li2g#kWl+P5mVY00GidwtvY
zkMo1%{C}-)7ysS)!ED_*tkfpfS>Xp<mBX<=K6xD(n7z?%Tl}PU2slMhGom7)xq1VF
zIxlN?5}?_7S|uTNteHKeIhc+F$1l!x^1bChcnGAw(v1XVYW<Hz?w<SqW05-ohM0w@
z;9hh0eV;84t&Qy;*+po)*SC&l-f>oUzUAh>rSDId@5dqn)VTxYwyyUfOQ~U(bSG5X
z+m9H>OcC<z^mgf5h1+P`;j8IMKU|Y4x0fCa1_y-Rv9jJ2NTlVnkwkS%tL-4>Uf&vK
zIL%GW&meE;qjeb-NU>&^4duW(W%amsl_rWN5{0&o#%l*3i*d1HB(@BvV?s(HO|)da
z>^pLAc*`H$&eMxXbf?s2UmIg^1;M_Nra>{l)%@P%?uudC4B9@j$zhNy+oO*@)&48*
zmz^&i3@X!+47s)-(3xrJLaP=i7u2v1k)KT{aB$sxiYOMJs3%F*YN~67v63SXjV?x)
zbTEE*66e}>99mHLjtgJDW6wUJm&QGVB^qXw&$tUx{6>m8liRIP|BBqEuh1{RnxjcE
znV`JQ83e2!-dpujs4z><V0gO)wW-KgC#zg4##W+G3Udh8kpfRdX0p+yXuFGZ(I9G;
zBmA|*15b(-WK#;E+m@{g=7H+UT`h)O*atTXrXEITwc;{SbgUVEETU&^ZUF1cg(ld9
ziVAt<vk!DtA~g*!y>zzwDzwMv46K37YrR}bK3}G8x;~Phb$mYqI%r7jWHdWFiu|#=
z?_4;X10>jm!`utA*fi?^qEDai!Rx?6y@<e!Nfj+iW`vIFGvQj^*Z{A$U3XWm!FJDg
zi4{f^Ap%ouq70QH$6OWJ8o-f=aY)V0YY?hHPlgTen)n`Mow{xeWsW3O`n>ixQS&%G
z)TBlfdErfPAqtX-xgAYgFjm0U5-!<k2TnmRRrT_VwCyfhIjBlRfo_0fZVr{Pc;<of
zxO_F;54%i9&gKx*Y)Yu=&NICo$$?E99sgM}S)f04`mJRWP$I@RRy&0}fNB`(iJKN{
zvB`(SYE7)4kwOtQ&M0kOa%yD-_FOoj9|`d`sF51tFJ`We3i}Dkox0xpUt~8~H+JK1
zWjD$HjqEm^{X=%sQ~pJEr<<-n8N|i2WA$I%s!AF~_P_Bp!?JM_(3uCPEZN9)jxO6+
zgT$nkZS3JM!T(Rr`*9<G`Ae|eT}r~3&<1UrnP`<B4QWOQ!dq_ZH3_)>&dI@cDf(Vt
z;O#=a;^a<X#if=WOj(W7Av`57lIrS6bI}J9@7c{<vW|viohC^HYnnt};S=<;e6!2U
zH9aWp7?^U^9AQFZDS|kGhaKV0ebh*#e*WK4-#jy4!yNTFf%Tr!zZvlNecj@TlqEwB
z$PnMYH%4Z~pH2Bj&zjyjbIcY8yh78{$C~jDF6JGlCTnA!2}2{oLfz2<;u)YUvWLif
zicc(Wa1mX-l5?3zlKE;}!Fw@R?*N@U6^c)sE3sE~ob^gVcyTo16@fS!`o<r67WyUJ
z9BcKs#oe&wwn;wO_;?Tx)u?rS>dbgDcWIV3B|r7RW`5aRw8ED7b(Dc|SRb-DtQdQt
z0Amuo!11wym9{L~zT;`tGoY6y15x7`c$;|JupVUe68nsv0iQIusqJa;q35}HpGJWw
z4-9R5c}LM&>!oxH3h$xDC50Hclw4bp!+`(JxA5`{g6jFj`N^_pbxIX_jc+P`maH0I
z{;J3m3NnrbZFBkBLqtZopLkBb%H_<7-m#!_-JlH$?aqU{6J$cCbFQAXQA%w+EK`c>
zL=gmiYa-_B9GVw=*ep^Ax~}ARteTYG2MXq{>pL{{K*5{}$)k{+d(?8G6|G59u~c3k
zN~t9@3SJ1P;?5L6TR%W$1$Kq-^Js2@R$wI68xLwdPI{mARuw4`H|)#jh5l$TkEkZU
zz+Xv9y((uubzzNsVx)KAjUT?Y#&<XuBQ?s6|FPMZv$>6*egvK%@ruFO37L(cE8UV@
z7}U}+--2@>Xn&VZ(Hj=Yd=0v7Z-V8umOCZEC)|aubT#h@wz(oE06_ezWlp3kKtt)t
z&?XYX`|V(8G{F!I>R*KVFp-_s(Iw2Y(@mqz#v3d+wqkXT7)&^7VD~<OPOTUv&GmQ)
zum|MroALYry9-Jo++p){^>W$hhaqQLe$Xis#Bf(+i;fa9TK4RFyXeq4Q^)m%vC@$s
zAzk`T)Qw*ECO3}C(R_g2V*_aByzehY=qQ8*;g8TCms&k)`ULqN4hqKd1f*f&xF2a1
zD0vqX>XUb%l8?EPx8ms%8EMtT1Y(Jf%q(gTQ<55%neB-C+OJ-8Q|_4<|Ej<KIVj7@
zKbQpEP7kK{)5=;tNJPp#x8sk?$1qbtB}^H?lV5s}SoUdLlo*>Mxcte^1)g)NGt2Ay
zi_Q$F+RnAjE@Mm1iYuvFvLv91@5@a=c0sY)$((m@uIh9og7{hvKa@)mlfY{=Y*+`s
z2K|V%OxP;MT}rM2?zX3S8|ZsP7GA0Om<XNfq<fk2v7j4WreM|0GY<FCe;2Bam<2ez
zi`l`U?8S|r`pG}_F$^c419S(Mq4<Xk^9|~V)5qmXhN<+{sh!#-!h8GaQfbLM>>a6D
z>+r5(0xU4EQT7VIw&m{s9uyL(lL<}#fFdLu_k089NFFVOM!c!D!x_KHiuGN8BmTLr
znL1^&B`9-{{(LN`0Y4mhDgvgjGy(Pp*X`MKuV-F7$gz=EGPNa`^m;fBBSB~@vY*(S
za;$|4Eo#3fe(U^<*Xw4%<n9uA{iW@OXBz6gF!!nq%q@MH$L+h6v+~^w08!Hz(>9oM
ziMxcBJ7s^83f>*vL2QYej0`-NE6y%s0@XBobt40x77A`g-dr*WUeMw+0BRsY>{O(B
zI(R>#x6AAs{*e6HdK~XYTR|ZAhU_LeB2Otp+oJm73e%F>%{(U5{fqKq6=312+U(-W
zi(Osk15flR^RLzm3oKK8Rsk3p(;?<mOM*FTfyk!DJst5|0)Cf(NSq|3{PrJgf%LrW
z_a2~D9Mu2uIZft}hbfWE<zw2_1^}KJVtG!&t3ef}N?9I>QL?QJBSE?VW5eD9Kz%kD
z`QpF~;R*wq=p2u~SlqhuH1Tx@(a$}P6R)Q*#CD{WuXGJ_5Sb<E_1x*=*$gG3qzA>Z
zVUKd{7MtGr>VCw0RzKXp)Fxv%45~m>3%5Z9a)?!W(it=lXt+$BQdfOvrq<Vaf4356
zlDKJ=2?(2#q9VTCT;a5Jj9QWSn^+(AWtM^cV{K2}EY}D`#<VM8#O}me$(WGZOGqr6
z=?&5jEgYtYpCb`I@xHxV_JmxxolrqJ^te%`bj=SHm~E(<e9vO%Zam5{s|Q!CXYXDy
zG^Cf%;b%L{%H-ZJ)dydU8;aMm?3(;8Y`Xb2BG(tQ>%z3#%1;NqkzARm3@;B(CC_E;
zf#u^V=4Uy{X1_Ge89NN<aWd=f?(cnB_o+#2zc}fLspvJ<%wc0Gs|+bF9oX?~Y_(1F
zVJ}7c^A~r{?^YP)f=(A7wdke!=PBjBs26wHOJIvhk&!fc3Q6Nd3Rg%`jy$R#zJT@D
zQ7Qb;8kj5ElA~OQxq>ua*CSe-u^WAnb|ZJ^8#x`cIA4R*|Dw3vjixMz;)aJgzH%QD
z;ILE|S&L#_(!?D}iVqUWpiG7J4MgEl1b%O~4;R^cOI+0)8!U$SVL@lA_uXs6`N7IH
z!)#C>8~V~grR95tP<z3N%Kaq<wrjR<KT}QJAN!OOfh8HlH?f)4*xjITr&Z{Z7BN&`
z5kaze<C32YbB%exi|EDN3tdGwtg`l$C%PVR-7JhlD7r)BE23;%WZ_&6SSRb{D^3Q3
zl8QI-q;pVSKRd?#AzkBO_>^^z=fwGOvbgblA$r}d%bW7IcXnT8b7&yebv9H_95+m_
zd6<Al#P;%gXgB+Ti_Xjb`G%!DKyqZh$fE5PxV~$;Xlegem)Ce~I_iDDxhPlW!Q016
z&;m6@15rNt)@;c2lc(5g>oDk+%)qi$|9G4JH~f!dX`&g>n0x_d(acp{c=*v_u&6r<
zqGp4Nr9!dFwY`mVTQ8ilrV-|&J3X3FESNG+BX)`$*zGRAe!^$xSIey^GogAM`|-I3
zLMw{^x?s3RV2+I*wq&P@EV@kjB|-VHHy`RaT!4AEIPjvbE`e^)QhMfHS%;g@NDorq
zY(h65czpS>%!nfvtIgPy2-SL_7L#lCT|&|e!48UssIgg^XnW2qd_f=1LxcPg&^(`q
z@EY1y@NS2<u@XuMlH_#5lr5CRb6AWNz%QAWGvxhhe3g`A84&ZS9rp@Be73v`SFkoE
z?G1)nL1v<$%`Ci82}m0MM)dWh3PNn|Q$*76%(ecZ(I?SC!7sq_a?YrU=ewy--Bj3y
z0^-zq(l9JV_uYPU>tnSToQ!&XN0E1y7N)9<Ba3a5E!Yxntf%*i-JyHp9>{^`nXtKO
zm;u_>#$d8a@Y`Ry$adGiTs^s%@^EA+`!HahX0T<292u+m=y4xYS4!leP1_9a4%S-w
zLXHIT^ll(uJZ2iVIe_<goD)RyiFPeu%3Lok*7|-My4$x#${sRrbm-z#ArfTfug8rV
zFTh-uPH425)Id-$4N7)eT7NW)xHbzDnCh46s0lqjgzJu*K5muw2U#%GEi1McVvMsG
zNxhEA?j3gLwW=)UE~XDYyxhG(!*OBUao*ba3U+r(HJ}ht-<$0~4Z6k~Rk;~HqhhPT
zF|2-P!ZiHQ9+>ZBkgF{X203khJdT-7ds>Fg7sNBqEm8qz0=hy_`65w@@~?Z?K=Y7S
zl4GIs=sEJECZd+GR-V*A_$4B=Jxek>l;`Hnd%m%ZyJIc5c-w6P=Hdp=II0Cik5UQ<
zOuirNQDSEMjw7YI&NhrAB2oR0_P6@-@-!+?buM!Ygv~%g?{fHhC3;WBbT-Wd2&D|F
zqaIP_f_v&nHYBtAO5cHmS<^8@g%~B>bg0CQs-(jhi>VTrvMirv*`!#?{a6sgIIJDZ
zB(q=V6%3+`mo03Nx0^5)ASV@*YD^8pGL=nMO6c5&#`dt$3Bp*jjg}#zXC)a2izH)K
zU6B1O*{_n!_IEgKEO-Bptc&qQn9PT+o!LJX7i{!v(-qAl2ojo-dIBj2l4}p>S@O}H
z<rcO~`pGbHMA7>OPwi+Br3kvzms^L=inaHvk&kJA#!K@OSv`3oh>aR17HFa~`Fq`b
ze&@shjc>NJ4mU3+Qj!YFJWf?1=>Q+I$-=no{4^@DJveA%-*(=86S54DN})>xM~Zgs
zmmw34nSL11+i4#}QrSIcKcFKkNUT>ik06vjqD826Q4$7KA2&_}bedaLT%l1kiwLvH
zc0aNM*6Q|#+N5`X%|$adz1<E+<oQyKs6IsaG#%blJNuabuKDw84(5H__dB~v*IV?l
zpZai`w9ny=ZU+)(Sen^eTuHInC2aMx&*k5$lJ6Y55Z#B`Wc(a}n_-8VhD^N&aGZ@>
z_qsJf5wN~rEgPWN_So*e#b;T9r)Q$y^g$#symRUGdIddGvH>KypV=@SYN7g?eYP<Z
z9pxT0;RKCDn|AoMxFx#2lgp1RB&o$KXn*qR$8E`t``!OG{<Uqy*!xpdNV>1wm@G5H
z2;1qyW&9$a4KT=;`y?io04@neR}Mj@j>kj%sd~bCkaVT-l~)?qC|){TK=*iBLF9EH
zSRz-W3|T~@644VD@Px_q!H|G~n=hze|Ko!kOq1oco7MR)jsg^)cxqBRjI%)t3z$*=
zy^%3t=YSDNTJ8qJp}mT=!dT(x0iWZWeangry19X<jqYmOqy%~)-qj(?4o$R$0>0J$
z=gz)-o6LN@oUBMy`^*n>w50YeZ_2SCLMMi@H{MRSY*x%Ho_%YI{`D}p8<ty?RCzz?
zTJO&Q@-4<;>Z2oUM);?nAY^mMwmfrU6evUpjGCh^`gLl!L$&HIc<@XL!HY@FtW1(m
z-~q2vnM}0w0|#aUzKAvflQiG5+o_lWqoEE4e6Udl=#*TI_lRvuL{K(kx41`N-&;%S
zUq3PwrRMpbw^*O_dzW$c5z7RCC8v`@pjmnfEge+pr}E%LA=)&F*y54@sm%<-<$v=?
zf%HtZQPse%?H06s2KkxQPuJZWvr@fy_Ykwe)Ajl0wq#MZ1QhNG>vou}*IuNe3`Yb@
zQ+)5%ZzsQ7IFR#sh=3zxuD8=OLGGYELY5?FaKC;@w9fT^d!&FvR}1OO2z|fjpk_qB
zq`N0er9RP(^}MPjTe>%geGz589a;>QH}@R7L@8m+nbyFx7)4j%(*n!KDZt<R1;fp;
z_*h%pRvNMzkmfx%Ak*kjr}UD$)UYzpui@D)S^meN<3VzM63$3%KKPPPjoN0GuiQp`
zhE>m{%ak?)6m4anX!joVXM~YZFYcw_YPdm8LvGP7A(;O1#+94&DRHFtOjQUR?|7Wh
zz~F{F#0R5ty|m{?PJ}cNban9O94}MZz@QW?EFai7G}Cwb{SsvjsIA{p{F`%kfByyW
zXM{d>QXS)<OPiqM0}qef)Xm4F9;ZnuFRqS>lHKx}J}6ru8Ab(;qU=XG69fPGWO9U{
z2j0{zW-xiQfQK`F$HyXYVayR6^gdl?W~bx{Rp(WR99E4Xe{X4|?#lVtw0huNqML`Y
zIp}^gut}3L2!yELdJh`b%`J7!dj%7LPvDO4vA6N4dO_JV=!9+N#f5z`?H;81@(0)T
zR565$)0V;-ZPK|YvSW*?qGpumDb8@p#B@DTh%5d}z=l#6Z_x|S1$Fo{c|E%4rSZ*0
z<RQSg{t6|t<=yGy@?+Y|-Bs;;P1Ic^2w0+iF`~8{N^GR%Hpg&(py^(3m2snCVU>rD
z0Q1+{?pt){7NG0R7!57|qM2At;``W8x?8d7>4W6&gjb2R;$;yDt1sMI4@ke6x0*G6
z?(Mc}1Byiqc3)t-I9TY%FJb!WDYk&=f7>-D1aFd7&>}c@t~)@5H`D5u|F5dtKknwx
z0?rfvp<l1TiVh!1$$(0HOmk*PC9t67^y%#8rpVbq?A@DpR(yctNnQpRwo3^3{;WsN
z1>UDl8!yC&3NNcmtLASWdc-ed-X#q*B2_5)dMH^BoWLOEri<G<KK%uBtG$_^8wiPk
zA8S*V_Voo@z)X|g46#*5*4h)`=&>%Q?!Ik)$?5r?C=0N7Pxp#jG2Ze%kuURp*U`|Y
z=k=P)qreHGQ|uk?ur>-<=Q0$UI0u>)>m4&lda_USv_lsoWgoj&zT0PI%VnJ4p(2Fz
zDeUzHO{eQ12MGqo2=)1`%A*%c6cMBM_${o~O!6O8Jm2f7M|QEF492A05|9nDaIf)}
z#Q!vZ&u365=EuQPfH=PFxBd?AH+C3rihY(57I533bb_s5&i68MNi?278)XmHSyRUN
zl@(wZP?*cjqTmEuU|`cj7rSO*wz=n#(!PcBM0OlrC`}9u-RRLnihSFXgmWysa$wrt
zBCM}E498b#A~gATh-RJOTHL|cXysLYGzubDNEH!%{HG1xmj@TL5K%S4L&XiTuFL~y
z5UEBsbS@95v#TjgDXf<^vm0lS4eoaHOl3xJ&J;|oCT96>N6MCT;rY<srs7FN&6%(Y
zP^=w*8WC};@8{a|*KA5sHsNVdJ)P;oV45^2Fq^se&cnoQJ<c7BzdDdHl4=YtX|o32
zwF<t-Rta2j=GW^?mY>R<Z9>W~IeQfs@@<kk`$(EC(MALW83zivGAc&6n+7dz7_J|c
z#?O{DFRg|jTD-(ZT}Sp4JeQw`3D8ON2JRG@fp5oudx;?Rn69<(5I-q?!j|~BxI&M4
zHZiiSPC$i!yR+&vYWT3m)hEE^F(s^aBcYh!HW<|5V5i2TT?xKkoRU&cK^0lNuHhh#
z)e4|arh0eJ`wM|4>@F6~ez67tKy1a;TzJeHd&Jskq<eO^bcN+S7zGTI9yoeHh;pDt
zf@iNKwQ`q#ww<hnE!`|LcqdQUqL-?_A|*8FC|@t>=VH{dc9Ulc`Wa1^O?2HxY7u5c
zioJ%U)S@_^Nm((zn6NHaD-(IyV~afF+{u3UJ@4CNRf*oW+-0OK@9;auhJ)fZ?=&3e
zs-|Hol#v$g#4DsQ%TKEl%qY-yEr!Ze!7QTj_k)*0<^xl--s{hdwUDZLIZ#(HBF}78
zS6C)&X5Z_TJo#CLTU^K>)e@HUI$!Ql^nK!-*vnY%WeW?6MZL7V+z;(aoQ@=_=i+Zu
z8>}-n)7g8`X(3RG518o}53hi$<@`aBXnb^6XPkol`N|WaGK3ufqIAlQsmx@FLpEb{
zMawa<J%wEE1ZkLeutvE@=w?B6X8thL{kB+)*1m5gJ;Eg=A+A%YG>d%9;o-NOj4aWn
zP&K6ep2bEQIeW6B`FibGCB6>_p$+iN_rd!qqr?zWu%vKIEt6}`7Vp9{CULI$p;HYg
zwwCPVFKh`{bV<?%QUG=YB6xincdJn-FZG6jGyHBdypy=@O>`K*i=rDegjOu!6h2KZ
z4eT#6==J9rfS!nE5BE$@_#)htLkh82V!Q4#it+N_ysI*RsSqeqd_I)<J0HH`Gv1K|
zlONur0C&UO8-c}vdoj=q(7fS7xeQw;#{hDpFPQsB;l=xMRSCMyWaV&&^;O;9;4iJI
z#h-2Iw(LVz>!o*}u1^%kFoPQ%%u5pFyN6i}m@M9f*563_^-t~VCFvx(?0eF>wLZxk
zwRlw6gDgu83kYQ76D{glrv&&3Mf)09FVfXmuzc+o@1=J9gZHR@@g9RQXR)ssL``n&
zX6JjexFX9sfZcM?K2rW5T6b1sdChP4Nrg70CXT&*2}UK&A_!d4geM;;^i~m$PuPac
zWHdeg^-jkQ;xc5tDBTbE9qOq(k}@!ffP!vY*fs0Lt=i5dYlVOaIkPtV+U<dM@6ylV
zF__=DJ5Jx}O8Jwu(AD4>yP7j;dnn#$*2_0YGtpJ5x@L6;`^G|@l!^sRUe*RaDb=T+
zU+Bg)T*AV<+<)ZiZq<(h*c`mUct95o%1}Vi+V33{{&p{9_?Xs!{NpsPQk;{GlAn*6
zCL4DUYVgvg=PCAJe3aFejO(Nth7eV~oT^Ijwc>xoD8%}5^ma}Uh%i;5!tB&!TsLQx
zkO>Tu4Zw8Dre5${gwS94NQv^#^m;Dqny=Sk!ND=})M#*$#Gy4}^ot=_bsc!Y#mx3^
z$ACgS?~|c`aXyP;-(s>)=evUgG2k)grNuo1xG^U>0vdv$gy!)n6S?ar>L_?^WF4cC
z<8IsHa<-bSw&&&-ETTyn@MC54_$ZBN6x6fg*rNm_;Tp(bd2yj%f9ie###~55;&F7r
zH9kjhOx~|WApQOCQqR>gg2cj9w00SgW}oKD?)`>1vCD*>C#H&~C~dE!97_4!2X1#{
zsq7SPd^nNoa*_m|Bnv7qE@qd5pWQ12gLiwY0G)qLA|ADBq$iL>^oD(7K1jRkM?LM{
z0<b%xD+?0>Ad8IDqb;)TFkQC<Q3QW;!sBRha1e$qhwSt@I?hRgdYcVLcFtLK-DW}w
zqD!P+3umoJLOq#uiV_w<$=`eBABS4_P&j^XmT7Dv?JK?O+z|T*(&U+t%z1v9--Ro|
zDT!>9?MggfPJ7%-=FJ~4Ia8}tvap~h?C@!Va~Wj&Vy0NNOQ$b&3~U{AHdg<DmVxQO
zr|*p-TNm*gz*7QLhZoT$a`$o`5`3En07+1y|Ee<+upI<Yt3n(G^|#JO?YPKz8>spG
z+Wtep0{k`k(A?YTLZqtOH*W@AH@0)<3`widwxne{cPURt?168{D2VFnA^`Txos)d>
z0!;|JbT0oxc{b`Y;98aL)((6=<v)|MKx0Ad(xSVk7rk5YLsVI@sfvFnoB(m6jkw&u
z{-W;qwZl@tAv~x2D=xx_qU=slxdnFOj*IoNq2`p`{2cgu8|SWFs#Rx2w9efXj$n6W
z!SGgMid%o`9t~@~EBCva2kv^hfR>yFQW2$5HNbENh(;focv=oUm;~>vJn1l=rx98A
z&}psPkh5%(wMT5EnXPij#T{LZpDV&;R7ty!x-FJJBj@a>XQsxA3@(UTtt+r&mo73<
zY)g|BY1%PnmEkR~3ReKG_B7b70f95HFXdbKbU(W2aF3v8@5{LCoVQHvk5RMI0_E6f
z*jVF`hJ~B3!E9y6mskz#w`9ild(GWfQs^<<_2Vxd!JWB9lQ{-yzTHb4z?@w8<}3mi
zTRoORn?C4~fcO_{*u<(Lgs8C&Y3%@IRgDxn<?4$C@mO&<s1!g=#OxN>Jy!LmTE5LF
zm_W^xnN`$I;+|gU;sOgj2eZ8Z{U$~=^M`10`0Dat%O8J2(o0PDN4+;`S~7gf+Ajr<
zeQZ{R9=Wa#;08iHAp4Y*YPSsDH7k);+J>3B`S#Incb?k3{^G!=Mlrie=(xO=nG%%2
z;~*1RW!sflBkI`{Nlx3Dc?0ZS&-6h`JQg%vH&bx5GK0O_dL7JM@(R(eMhJdagbVN|
z=myh%*f|u8$78ky(X!5FGdEo<Di5<3^o|W9mpQ9%TRlu5GPz`zGf;rZ9HZTR{V0py
zdB0@nFfo5#Ks#GDZm^QYK$6U9rs}9JF=JTk7SVit7xAr`ITraIoC9eI1;aND)V~T^
zYnz9LsvMt>)hohXZq<rppdBJ4A<$BRy{%UnwENv8{j)8&eG|?$i-^H-n%%*uZYEMl
z9-&=IXTPI(jP>0WdGo&e2bxyd&xXbuwXWw;86)6%(ul|KXK4wn-MH}Q`6DnaZ|7l!
z$qe<4=@N~*ZWLLH_0Kg%+j_H=){+k8<Lesoc>hN~xll93ai1bp#e}58cMrQj6F_ut
zEXegH->dmuqt{Nhq4*<^NOb4M@#7vlw-1D*t_v|MwX^8GV9CkCFh(Gl&7bE>&Rg+W
z+`pqzJ*sR2bLf&Tts0G!hFv-u@2W0q-j&V9*=VF;LZb4}Lq$)KvTn+gu6)cJrGB<w
z^OQ23C=XqFaPI{gRCe>d%KwXjd?jVnD72Mp7aauJ!dir1Fs+IN_{_cXAAGjiyrmi>
zcC?cR^94;R1{QM2qtzsHzf`;BCBgda3Ee)FRVr7AC3FC<S~beT52D+bgZc7ZCQaS(
zU*$Fz7}|7(bEhQ>bFFa^q8QH+vHn-jxb@_hXI%Kn0SbF3@N}0KSU1ItNz+Hx+jJwB
zwp7g5v$zs#LRic>B<7#Yjv7XS>NfVtIL+D44NjsQ6+$J__20MdDb4B=ABMQiM2~MM
zjzs&dJw&U&Y8wiC52_-NHkx1OGT1MPV`;cEXviUdk+^q3Z$R0n=C<Lp=)RdG(&Lv~
zEuOMM%F~&>a^RGT4G%}K+@p8+CcXe`Nr!_V*o+9gL9cFqeTHYm2md?`vy!`~I0xci
zC}yVWVgu)P3)=+gFdxhE$py6(nU9_k-q`V4PX?6EB+i!t4AwD}VTf)Tz6^}ZlvFm~
z>tdUDy<B=gW^^-5_(J^cu8F@Vyt$XZ2oKTzi}1Ec%bNYQ(7S!%h?<Z<n%K>f8{1K$
zCCXf^WqUD_S@t&aCb6J?BS2`EBTjG6KhDiZ*tqCq+I^Bf+@yUPG$A<!Z(ojpaqV#M
zz&@7k1#=wl{sbc=!JREVz=p#lKoVy91Cmgi;AxT_cSAV4<-#Ivyg{8oo5>kV$FEf=
z7VF?oe+N|IAfkTRMol^zG|k7Y0f?>T=;A1D5sAsmoxN%UI7&g0b>1Sb#5@`xpM5YP
zltqt!lX)`$_xuA)%M$tXWVKO@kf{c(UeJ1^dFfA3L<4K+%2Ej-zd$h<ZH9GbzF?e*
zIkX&WO?gtTW~y*J+#UgQZ;t_n^b$;(zP~LSKDOi`)&!Ol&3^CrArt2(@jas<>!b3>
zmkhBRL_(laRKZm6_0;{b#gorpeXM{za6Yx&sWze63uFG)Yn9@5g;O6u+Nh#z3^0>t
zc2d!DBt2r;>DRn4CeJ=0^gWjN^b10>GSDX9OK~^@ZMtO^!ex>NVz4?`y5qR^yY+*T
zP{#JI^=4tlr`!3f$iY;`GARG}c~<nNfvLO`^v#<6@0jA7mU<JERuNj(6;6;%_sQmG
zR<L=#?}sh}i*>hX$*phYeKi1LXU8gxmumZrfCm)3!9AS2C~m}~CboxbuvM9OZ2W~(
zpr5)@+YI}RLAB}T>7=A8!M}x^9c=Ym@~*bQ?Dp#gZQtq30pkX*QX1FtKg<4^1l06$
zSHeH;ePXoY&v>aRAn<)0eOkI&q`d1N1wzojk4_M3;r_ufdJa9k^=XS8)skS0|524X
zSO+)d#XhIYp*b20-38VNchsnmmukRS(!CjT-1@Wck8WhpkJ95G{p+QH_8NpkZKQgq
z?s4h;k!EBgE)<X@xmSgXWs9<fU3xuX*ws2$I#eZi_SxIbt;g4s14MjlH*iU{(Ib`I
zdozoQW+qS8E%QFV$m6pmz|OPwAQL{?4<>w6GFC|enf34Nw!j`NJWlx}+eIvS7se`(
zUZxJ*zS$Wi+#*ed7(4Dn>=P>ZS(Csbg(GZoexa~(b=M@Hv{#Pc`SVlRHPsw`og3O*
zNf9g^|I{gPJYFx}L%D?t2G9eGATLXKq2~D?{ZHbKaQBzEbC!z`euRr;k1)6-bx8|$
zR4$=ZI8XPqC^6OJ2TR}~=f;w??m^bBC2TC4?6f5cBI<Si_-Yx73Ef=tEh>xQC7YQ7
ztD{uiURN5Xmj58ziAdP_VM!2<3H=GT(RYfQp-9c<xTiqB(9+djbl6iIWt;NVeNy4%
zQnHZV3rhWzEa_@TgDBJ+H~;I2_;`Z`Nmlp=-k6lNCm7^FmijX7DJF=%wQ@%$S=m_T
zakR@DFax^03n2lGBdcC_BdBf&sK<59NPe5e<IDVBmzVJu6<6}HyEw@@eH1i04$@m%
zhur^KB4y+N-zVWwPt<u&`o+m4J*DZNlwFy?|1V|tut1s8^mw2mPN4hM`y1EJ?G=Sf
zQ#5RYHv-FXqQH^~40SqYV}JkQ3DA4!)_->%>9MDF{zrqHrj_=M-zDCc7p|auK_Kxz
zs`n~^A}&8K2npp!S35v#C<B`m2Q+*1-9FLTR1pG7PwS*pmk(PAgs#pXCv*z%hGp&r
z!c}Z;UW9*E3D^bs&wO53)!t(N)m6KnnW|fd?m}K!J7i^`$+Nn^YI3O&gd{hrVL#4n
zM`b_Yc54Ae3O(|J#tMU*;8F_J(Wo@ZM1RDhLD1UfL6zD>;7X;LwZe6-4#7@h`M6s0
zGa}}kV_Uac%NZ?MH7cEH_SK_3r81aDQB8aZv^*BEM0J!aYO9*tQW%&NtQ-eNPxBDy
zLOy@XWNaUbX@Mo(RKVZz5(JDNuKNPOusJ^ZOJYTQijTYO`g(V_Y;|G}gTDH-q(0>-
zkjJw>2_}Q^&P;Sc5S_$?3+BjgL-`fbMufH(eJ9$GPM;tv)CcmW>n#gc=vc~38-XFc
zZ4W4wNQKMMNp<@cvQPgBB(@sEo@BaN|9ROMnf~&3wO2*@d#<0LEI;16trv8&eEtDG
z4Vyuj!<LzH@`cxl`mUgS5AyhX0p(%920!1Pzswz%+zY&`L{>%Nut(J@?i&eraQ@z(
ztLPUydMuNa5UT=^0n>>6Mc^7#lmqH%!%{~D>Thl%yK#|EnXmm^IfGk9(=)iu0J(W_
z!W)X!IuGVlndNd(s|yGS?G|?@G+oN+nbSz{GT*$S)dx^A1FQ;q5&_7=xVgNFO4cC%
zK)FD!8BS#6DK}WPiENpF6wx$eAtLdKzW;>FB0}5IqWEKVfBX#{Hi}yVJcAyd4G*lY
z{j?>McQ7`$&5xwz$L!pq%@ncDt-OxBC5_mfZG4EeR1|cOKeh4w<QTNr7tjj8trQHN
zVrmxAuzkBKJ?p(36ZpD4g~WCX0+uygh@r7pH!DWkBssnB(u#6(q37P=k;$ZOPyoIY
zXClX`^BC1WG1*T6X=eefMf6~>;2YCo)56G_l7+8U3IW?&KP}NMLzU4XDSrg)N1hh>
zM!`+k_#y4~Oo?QO`D>?N7)z?)6E$`69yCAG2!I2;UkNtW<}WKwF`^g(Ap^j+0N;b$
zgh(T5d9&9JB}gJ*4`LplM1*X(z7#QO0P!jo_UJdX#__Buj4v%@OXh8eFc!%bPdF^k
zSNRcd9I5ulpDe)glebEwIch+O@uG9@eoN4qDe6S1>#i`p^%8nyJcstEOpF&n-)nqT
z+-U&Q_leRI6GfP_NSI?)<QW>Rn<*+wPo;WH&@eh~rZT($5a4PuWBca=cdpY!X~5d|
z<<0*g9k)aFQ-i$Er~kY=o5HUlz{$q`wddqO&-GE)sgG;V*@k4Ujr)g&y|_yT4zy(L
zagv`{0}AzYHAi0&-q2QwHsnbtPXBbF**OE0quR(LU^A}}Zen~HT)n6<efh>dJLYXk
zJQrn6T(-?+FrS=ODQv5CX62?rY0vNM+J#w4g^;cMLf5*5e?r$;>cQUN**5*cBVf{v
zmO@s>Bwa)4mNj4aWmvFnN6W*C4++<;{A2o&c_~)+95Rf^QZ0$?J;Xy|<@n2-b*#1e
zVCYm+Gh13Bi12{OmX{X7oXU8Dq)z!D5uQY%WET8e*3FOmE3VGJJHu6IbO?a1hUxbY
z^G{mtTVQ@A$Y+subrYZ^)k8qq+exL;bKr&HpWWpbsyLjJoG30r2=a-^#u;u0xO@=L
zrHMVQ1oc<UPWFznapp)%jX11>yY>eQxt9MYDm7U?qtZo7^lw_S1Pf@%v)mubVN)Dy
zw;yg-l|pBFmNz%N1AcrIh}YG8cI$%=L~65&eW~-+L)NHQcT~M6`xSGb^MSVUg)@;V
zdzX^yo-&rw<ByukvQbP)bl2HDG{5b6h~;$R&JuL@G3m2FIy!XiBidAe^n7U#&V$GL
z*u}uIs2h3E8%tNV7YFthiRJF{2|RC6>PAavV^G#=6cc7mHC+k~DEvJ$ZoUpX``;k&
zWf!b7ejv@@Krj1M{l_>~(`|&jGdtx`>Xiv<b>~kZ!0Jtg4O5P-`9wN%zTLD@{ymu%
zD^d$-L?q9#g8)goLXiYDgV}l(eyU07=+;3XY)r7THZl*3(`$tDB>bwPo=ufa*8Vb<
zjllR=vi{IxKwHMouoaR(Y8C-is6&)audgQ)#PH)%KE{gz@?@M`)wf9zSb%2{t9evW
z{tjOPF;3HkC;ThY@)to))EVekNz)7MJeJwSWtzs;u8sJM>aUJ5G5u86Kli*?rGV2b
z)YaXuOnvkf7v0x7S!~n>yAbQ5%`dEUm+Q;|N)@_IF8U5$l4ixGNTsArsZQKn@z!H8
z{}IK=^Hrfgg)=bU!`q^?y_EWxB&-P{%hWOHc*z7+KTR0dfss~{T{ET-18UUyeC4y`
z7LkX;k)v)CgZ*JC(n+aLeN17iY@3<hsg=lEl)+9H`ZLhVT>Q8t5y~W|Y*Xh}G||jt
zeYkc!Nt6FfWWJDF1l67^09L%ZvKCqPtVcR_yC-$atoUc}fIfHX$8@_d1Aj!hDJqML
z=H8$If!bL7E<8+YUiHYK!+qhlogG!fMSl?8A(!nTeD%Zemo94A>nrG`H9BmsU=tN2
z(qHT5G<*H#|E69VHH_B*HGaGAja^wk6c}{5ROgt_ctMNc^oI#he|qP}1Ger%oX*^N
zDeC1dS9WN29lZL`B1?(9j1g3mp>s6ftv^BO(N`0od+x_Ebo<^2w;?qBgpA?F9_f(t
zg8K94@_J%fCKWr^E-v2bZOtGT2Y65K58i9$fWz{5y!ez@&8+}sx<OrOiVU{sQm*=O
z&w!7jBd%5a58AtUuB1VIer=-z$M@EY=8W?&kv!(VS4uVIpS9jm?YvyXV{vQX<%3%+
z@74$jYY5(u0*e)FH@hj>p}+PR_cG8|S4^GFW6j~_cEy_B{0H|Kqk&|0TXRw$h+7r>
zJo3CR`j;S+&%M3*D3-@_N%Jg7{bb{S;4y;o0}WDKgQAcy^{+B1PH|8A%zB<7U*F~*
zx$T!1Bg6pm0ZFTubh+u7@x<Ett5$^K3+UF>TV18A%?KT5z`G(6)jn;L`_c>}W$Lde
z3HwI#NR#tpinbXB!Mk$X246&LK#?OFK%=z$zRQjE4QfwJ7tj3YOZLDWLtP2SRTVEB
zwf%a39?{tBkG8AM?w$=@u{p=k0t_z=Cp^+^{#U>|Ya{e~U~9MZTY2=bMW?G8MR2-v
z*VoA*;~)-*d_7mAo%H#w6EveP$l8Z(ekZ0c63^Pt+YA2PjOM!yvp%=ei)(IZ8rskC
zuP~(TZ~C6F&ks~kVK8__DF6(y051?wCnol5_;+Nxk2f{2hER`<8&d6gtGWMOI9yV%
zpWHQ{x4G}(Ei-^K3=p;1TQXDBBK8Lb6yA`fB+;aH?T=t~iAhhXM}=)xkExMR$(%$L
zn|T1Yv0m-NYMNb7dH`gcCDn*28;$F<ncV^?7`Jplbv}>HPhV{`kwE*?J}AKutByN)
z`jO~$y^ZW%Bp0)eCpKCbP}gQKcf0B@%`blol^rgv>>g^4EFRTYm$&!-dr+wmYWwOR
zIyEH&dKmb7==_r~{^5r5W$Q7z5gnQ`NqFAg4EIz4;YDwI*}lXFk(0E!dDjIRI+rBD
z^ktEMq?iAwjb?vZ?dnDH6Gm{u4C`Hi=$n+ZHWfd=ZvVEy^=S`66meD!g&UtG)M+2j
z66y%p@^MG^aih-jr9{n~XE`a=DE7O7-Gk4_Lg>e*#>_aMriDwF{ldRl(7uGYe)YH5
zq6u8dA`Z(G;pZ+6?>Xk6thHe3WU4Ge$BoGq9f3F@9WAh0wgXI3<P=a1i{AOWf~cwq
zn{$;Vm-=XJ#o1P3kCessKAbuO)G=s<0`k&lOxczcLJ<R6YYP!U4rjP7rkv=uep2Ot
zI+#Odfn56$m`@&!Zk}}YA5%~gMMZ&($Z^?1E(&dZo%Ty1PHvf?$kCj;A6%5So@$(_
z)~W1N6u_xNl5%bvGUm%xaWA2s=`kqw+JC{YP7Rv^4Nw*eK{A{K%KxGG>o+VX2DA5$
z>x+Z?aOrZi+@JU*;GT$&LD?d$Z@fei_IA@ivRMs03|D7;DAR9Q<%lDJuWxBn1zrpn
zWF~ZiqHp(@E=Ur5yY23&B+)qv1I2DRzW_(g#bLt?^ZDC(z?_$ph3%IQS?i-+eK_$}
z?hm~w6sE8YDd0;zoE;iB3k`(3xRb)?D{?!x<OlUvGGeuZhRcT9NuH9`ejWnK)2hSx
zTL*Y<$@53{3>+6IEA4qXeM~um)AW;@(BH2x1^K-P^{a;V0pKPVJRZ*b#^f|Iqsz2R
z++5N8><Z7)^1Ln}Ef-}hMaPvU0d>&MD!MY@-5u8zmLP#!*goOlWQ+lU`Fc`lBl$+2
zJb6u`gB-JGK$tYnLe)q9%+3GmU)ka~A}astUs>6I>v>9o6|EvVM_ou};x$ANg{}k6
zUqnx&IX+s_%QmsmFKUV)LWvu~K*GRXUtuqWh_nXJ;g3*gXL2-N(#`JJ!<=l^nd?BS
z_CyMftv@Sv4kxl@YKy!6L&O^C3_j~nuV<ZWW&h62RD%5ukNK_FsqqnWZ_xjy9ZOD5
zx1*^B0MEmI3KB5K^%fqEL6bh`1+)@kz94E6c?g^=hyx!U6yHXkY9pjYwxas<?fA7t
zwj5R5nd}kCTk51?C@3NnJ%44vih?7$QGMi0$7)Ue$N3h&{$qwF*6Y{WhV*l;&5x^+
zukFgL$D&9+V32w_m<os;e0c{BL*t+DbY-wTHX8&Y`FY)%o=<CCh-OAtI^T*cX+)z?
zqxcOpD3v4YG`P+PC}bpu0NWX!H^P)-VvDZ;d(k$_80Ww{ifq)m;6eLKA(YzM^J?9|
zJ=22|moG@tu?uH36WTQbXS`5nOnaDrp&>6*5jc=fKfTq=kO!<0b~AcqEZe;gtdk;!
zZ%pJ~<7-(_j#$(t&Dq#^dppM$M3%@{+fcLKE%p)tqBN6V8YTZ#)O17xmpMNzbvC!f
zLT3fzN(p)ktY#3Rs5q(R_+?}l+Ws`M^Iv`$+2z6Dek4bmubqr|+VAnbv*Fn}hzXqX
z;}BG2eViG|7fG+j`g0XuAZT^VL|OiWe9<Z)tH6->*)_YM?;UJ*5OF6TcCS#nXU&uc
zshBI^2Y+=q2jHt2j*2ZOKjy>eeZRlY0FTx34hZL1DgtDP_Dbl@$4Dif@V(FA*9{Yr
z>%qxlzlEXpD7J{p{xQe3z{<Xv<O>?$J7OX_n4`?wJgnhVMI5b?d}DKQLf^L|Q@f5l
zJ5byx3Q!;}enO8Sesf^}*KmtA?w6*`fer?q<^Gz!AcXlglk2*!B0uk<#*?1Mm*pK+
zR$TQup9`r3`kmT5BL27FS~g1fk*MR{CIQb>FKwWHAhW5+STo}>C`591;MwELRMIye
zGL+{!%ylV*b$tR^Py`u6Dl3>06i+bz&||BE)0eLi{#`A@fV5p)dM2O$TT$SuG`EK$
zqD8*<Pq^-uUG|Av-ZV22mZiuUsVbANHHr01QIIWlz6a&HlL@OcuxyJ=;F9gO%@8F1
z>y%eas_3Z_Ai>)zpUs9Nv=QLoI$$?kai2u~bZKSVex3~v1x)K-18_~-7c`i8%UeQK
z)r$OQ(@9oJolaMX2{G4iqH{0u2MC?IpM9KNJGMfZMTnaIKKqp)dy*lK@HIh$^7b!L
zM&Br>#}AdM7aDVJ<SVMF-9VpX-*__8t8JUi?i8=hsax%nl++c}mD49!?#o7g5+U99
zhp@Ghccb5GN}b#MWBlve9%Ai?U-0znH843t-Ajb{<6kR*f57zlDzwj50+-g-u?G8d
zUP#y6A`{*p{6GqQyYwS9e=lj<{rdb8u<P-f`LXcEB2cgVc~-A9>w153A=!MlmjpIb
z2jdG*@tS)4-x>*Duf5$RJDro$Qc~qw^s@b5<-KQ6RNLCNixCwO5omG{BnS;imaIaP
zbEZk!O^%W?sHlV{NY23~XPcZ6p~*RCB<Gy-H_^4$-fQn&yS_T_dC#9S|4~$TjT$}Y
zn9p;M>%K?97xF|7BE@?v$0&rq9-rEiPUWlDBQ#p29)L;AO*R$Y;0fBU5gxynGaFct
zH}Zqz%A$rLx<|i^V9N6!03Vn+C!CQ~@Gl$K2J4AiVcCP|uHesYJw0aFb)4>1OzS3!
zedNaSnzgWDmmi9szuB-j6>FM(6FWiJdjKg4j@&EDO8_%)WWMp20W3fxMEQT!0A}Tm
zXt{w6nM!8^tXz7|-RSS{MUK*MVYrc(7j0`zQ70O&9-6dEBL(#wyV1;}A3dQMFF<D7
zKhF|(iY;k(2VNpD-f<<Pb@mi$gm|l8#Z}D!Jm$L`WbPnP&X<ued+-#9BN*W)=e>m(
z`rZVB9<{>o>ZL*0OF6UT*$26n<ULvd$rrXCQ8Y}k)~vPR73fcpUrO0Nrb(tHiNFfw
z+9|FoGeoogG<|!n@$Fe*u~P}}l$sK8-)3aOId6OQioEvjpT4kCWY=Pw3acq|&E4p)
zv7Z%xgA6-7lz)H;<@WV`qAKj!oi_c28_RXI)uPin<e5Tr9#KQ-&Aggr<`G4URx|S*
zH;_bZ$7v=t-2Q-`=ee<FhD>Wy-+d5(=bV32l*|5Gf054<u8)bM?Xj;DMx$Gx<8rYU
zsTDc>Gu(}x(6LiurY4IAHSK=(fJH67YZFJm07Nf>?^3TPA377WmXE7L*vQ9F0_+IW
zsP2Hp1>NN-7wuh-YPMecDOiUJP3JLKBo!E{l7ZAyMJ-OT;WYDEUDdPUl^f%#-ck-E
zgLT#g43efkiueQGas~k))kaRN-`ehgJH)cV^ZHqB!ar5vjz14>8S>zftGXa3k1w&o
z&AAGCRL}I4Mh31*$vhlo{cnPcFK`0r+$H4hMpI%Yl6f+aF%?P7@$)!%RiY40daHbS
z3X<aD<+2F;=;{$0-`a@k%46p+W4f<sd-4z|<XOt+t>Nfj?yp_ELdf5h-u}!ZAW)cv
zrkbJXD8*k&wA8{K$0GY@K8;bc7R`SDY3$e2fNGn;LoVdZ)p%+o_Odf<SV&|m>W@uM
z(W~pS_$*(baWzj{J1S0%=RtVTal6>s2U|`~O4<!?ToRvWIE6|RhLDcGR3Vu~e>ffa
zFU;uemGyy$HZiZ_4*Nv;fRdttZG{Rxj^gKolDN33Bs@N0x65Z{7=5_G%#)F%UZRDP
z`~8CF!pN`lLUXh_ilmr6jVYi3>aZVjCt*qm`FVFv8H#dOj(BTRYasBJOww}87D-xA
zHWteTRUuaPxXW}R>hZH3QXXlf2e+zMVca1t7I2@AcUc_wY+XCoseai|74m>t*STXD
zP%sb+eC15BEIzfyVmayFM#-{`jk)G~f3c)AOA(@+>un7iFc!g;Cs6v*MW0S9Toqg{
z3Bh%FV{19f@rV|Px?n20T7dldXm^C^lZr;^b5kZcyH}=)Mo=8(C#j%;#fWaEV;8>)
zGAQ_YPDsec=li(pCH<kyP?ae^y&oRoAd69WGiQB~L@1E6u*Fwc;BqPC{FpbFF?Or3
zSc8ArAQI-l%*_GGJX)>&54H6lhAOgrKYNP4N2DnUB#)pfTD*YV<-*72ATs$q{#-sp
zh{iIYpGe);(l?f0-I3&iibOIMeLEMu5C$CTc0#M_wVC~fs55@8k2+v6k!)@At3vYE
zg8XFc8#lzJX<G#y3!FY5J~<pc9Kz|Nalhr-+J}VDB%<WGndP1}?pE+@u+i<1U?I(h
zhX%R(>tCs>(njAjTw}<=V5i80WHBqb^XxD*a{!Jho&B8c51-B=VNXWKORGRY5+%sF
z;jCxHoR6d+1&uS34#}69?EG@psqL9KI-iOVOGFF_`?8E4k{&iQB`+zB8l;FLZWa5-
z1BWiVDlFIs{9PWVfB%;}Ode6d?k{D^SVRt01E!+nO@DBNj$4GB`3O~oQ~1k0Dd^lR
zx1xorx=6%_rOdUynUCNF*&gZCTFEBKzba>e&mNI<%p|<^;BjM<ZE84wzwp8HN~+?F
zf*ypwFYxL~97yDB{B47R)e_ZxIUIII5@z=RC917Mi$K?5xv)N?N%MRXm<78$EDws%
z(#xJUq$#g+YlZ||*YEiv{R$z)csA;E1+GSh6s^u^CG5_bq`sL-Bn<v9Q3i!QzoQJK
zu5<j6WPq+h>$c9*l+TNfK{DX~St3@J=^n|Xe#uMI<0M`54GNsNH=Xu<;Gx+Exk;^7
zA?6j6dy76FY<^S<U7UVaiWg9Y79H$P)8RtI_cBxfjO8EVu#P?;4g=xAh<~br3$Mgs
z7&pAwxZUO}S*YStI3AA|#~TugU)_`xg8rxmovr1$oYI*hN#w@o*mstIvb{)M9{<CW
zWYFI>l%+k@gJNW+ppTh)nki@MV;#Lnf-@;}>01E(Lm#f!_%HhKqo_ahVZ^Dr1te%{
zC>{lzeUVP&&{CP~N>`3xy492C0_1b61Bt*IM1`!!%8wA2hAK%!C(^7J%6Ore6Nt-*
zA(#<t$(b+nv6x~mV|YUA2~`C+IAn=Xq<dWw3fR-}&x56~dausBaC28T{k65w3nbbh
zMB2e0vER|H93C5m6S#X!RRHuu)IzF)L(6F%0J<)8#)n*AG`SE`eF~_>g&zW4#C;n^
zKO>28H!S?p!aP@5jx&5_PX<}Dqlp&`x{QGR*bR=vFE55KOj7N-6s&iN>-oxx=qkVf
zbkL6ji|7VI43uItQ~wf;HIpyjmi-cqn|G9#JpKNie%<yK?cdgNh<DOxFy}5R15EJA
zOJ=L8G8<tu*FVY4)%g94T@lC@SD}k2s@Aj;wzukVG}#*b8JZpK{z)B(rv4KrXzBZn
z6Qs8JK0demOE)f5rxVTj8w-1#Img*)t-V@fyJdwd?=D}nudDv3hi$&#pC#!(atWCB
zf>}R~+pfLjbZ$mJ6Q!Q{Z_2Uc&)>@N66=Y;(77+!ER^bJ?~HIaQUq~#W2C4|k@bSq
zPW$V-&Qu}UuV`|&%lCF>|H2E%VN}&J%ox>LIbgfbpXkMbUm&=3`9R<-|EKa@mg`q>
z1Pdk=DP}xk&Lu@a9)WKHFAzuYGQHSzH=W~8O5i=>iV|p-XU_>@ODpX0&otkwa#H!|
z{FPJ;gE8<Rmzn8;sdhZ(J4Ax+Z_4p5W}Z%02CG7t@_5PF+|GbuZXI-)_2dm?YhwD%
zAx7SH3=9TKikZu%2fwX7d%l{h|4Y`Mf93@C+)41N2Did7b?%cr-%Ay*ohk*eS)Tt`
zj{f=Y7l<ivnAfgn>rh2_yE-NiB|Hdtc6&sQPv-XSS?~B=5w-<>CB^_%@Q<3*uYpbj
zepA4sb~Dku=idv`kG=mF1?gL7=87P-oAlyj(40V4@Tug!1^sGmJ%R98oE1~1ze$O`
z>Z0yGectg*wmJ<EElhYouzDsW_YdBno`FwGO!TYNacoeup<Z~<Qqp+Ev<Wq1w5k#B
ziss?o&naExR$ibAC7y8ms`6YMSesjVbeCs)XW^Qnf0SDvrygk|08R=!9&jo4&1XA%
zH~LnoUq{C9X_!JqLr8weTP(;vlV6v~g&ZmlWEf|sqWy#}Ozakr0){bKa~Ie8#%Xke
z+<(#^y0sqHgHKDCYT~)ar0g{_(H8#k#U+OvYJp>9Zn$s?AZ1w0Q;W{b`YNH|Y@hRK
zUdshZkNumRe+3juy*IIN((VxOEaPqX_1<PQQ$xRbT=l4UoyQ*_Cxs3J)i)|^)yWx0
zOf)c?T98k!mN*qkzHlkTM)I|Z3|Wg4t-WlU(C(jDNYb4g0Btx+-SXqpII%+?ogs=d
z*wX-?5yu07jtxw57x-W~Yh9GiY+Fop6;7~?wMbs({(huGR6IN(!|}rN29^y|KjEGG
z(JBsHSAy<pJwCB{7jL&nvLt1Q@~efWI{GHcdrkM4A02lFO#HN5o7o<!2xL;}?NPMs
zqlOq-m1xi88*Ey7P|(Im8e8jlr-U#FXFe9-ORLEGzPtLvzC~}qo)h$Va~v^PPUqLq
zUz)Ui7V32_h;J&>3!8j;AxUppDj<74sFx3Mq@Xq{YIfTP4%%R36uo&K)XM5|Jvs|b
zZ=ga=Qd;9(qPjKoONVw@`K?0}o9fd1$v@QkF)ku|QuyN%OZ2KHCq<{PXQG#5k>fd)
zL28z4U(vFqkfT<bNl9y5?`8Lyq#0Qj9SO^~n49f)>j}o?=UEPW+C=FaE*OzUtungD
z<VHR*p+(On_fC8mCXe4k?P$6G6Dz^>v$=*PGT^pZ?5a?rUSNsO1x)7;R)SiJ%ktq<
zY#TIM^`V6ZV4C5%?Q0milIeQlDJs(D-Yi{#<bLolK$1DjfbLWL-qTc2V(Y1Klz`2l
zA>VBO%;cST-jMO1ixI63uF7R??)@T)_OJaW898K|VZg??Oz+HN>S#<ZQ*L)X<}CkA
zCe+h{{3}Zq!EpdIp=L|2+IB<O;NT)}k_x5^m{Rhq4A<U^ARxB!<KeP|4SJk3gMIad
zJ#_F{HEtJ{h9Ehdm;T>>!n<8^XcZ!(m(kiT{gOp;mR-2<Q`A4nGj3<5pWg)_%z!cW
z@};cwqO!>J8{)rMiXo>=Jqxq}PJo%>VcdT$qt{r5dVrXNopEN`{>=6w5%S3wlMbaI
z9)-{i6r?sE*nhX7<vwnbMTi0?<=xHaV9Gw+?9<UnRyDx$sQ4#X_#bfvf58?J{Jzn=
z7L*%?h7UWovosdjm{BiRG692JaX3veFkU=3&m8vn$mNq|j+sG;0PFCjh_Z59`}AoX
zWUva<yc=pRmx;cM)KlU*sv-Ft{ME9cQd=BhvDo|J3{=768OQ*%;})=cJ@b?)5LcM8
zp`JMD?c4r8q{Cb{^tbPCN^TTy=2NDLpWHhL1618V&0qdTmR93%nK>?y=+L+PT#F3v
zPkC8V?04IT6*aEu3n(E}v{Fti=l+GmkSHp*a%LX%#9`j?IZhrJvE$U2bfZ-3M+&r7
zYM$sb?w1umUmk=NF1=joI%9(t(Q|*g5NZLoL6iR((NMH|y3XF4{Fg(Ffupq1q;bc9
zP2J;ho@8(UU0Ue(oXp!i<jvQa8y`q(5{uDt5sW@y&IX#Kv1cn1Z7f34^d;sGb*fsa
zTxgkhtYb4uva%mbzuD>cds!p^ol3Fwm7e-RAevbEq`i2UswqY^5WMGAWGrv)$HWPE
z$kKX=q)&yj8DiSbbz%Ur`hMZt<Ww}mKeJ7pqmxB~Im>*H8ct2xF>GYd^%t`NyD2gW
zU-Z?@5d#{5&Jo((2zhfETI_PNFm&qU$X-=5>s2$ZB=aXcQ8-x{itpw>Ym<qW_PvP&
z;Tguvl)pya7`%XtOjj!<!<K+wsxtKWKiBG3vXP(wcmDv8F|!a{Cg|GV741h+e^<0y
zn1S$sSLASU^`T>eqF;A^k1|twB-s<Gn67x(hfX<yyRF2&jatzYTJfO7mAA@2O?|j-
z(QmFx;o0RRcewN;fo!sVil)hMYx9>HkaTtFR1jKatDX?<mA(vwB_s^v<4x}W_M)Z#
zLX3F;#26`_+Kc>04%zA^^)D-$Uid$(XuqQe04tgrq7kg#$VP0W%WKW?94~D;`1e4a
zK5S}LRqRK~t;ocDtV_4$>DVOZU(}1~oEUgGT$g*K&{(X^J>+PYBZX-4U2v4j0z4NL
zHVIy(U7k|wz~kCzZ5nelUIw>?O;U&d_C-lNkKOz&hMR`BZ5oBerA=MFWtJP=8`@U5
z53l}hV*9bOjqmB@fdn}Qacs_+7CA++#oshcDD3_L_3m02WM9kMJ+AnNtZmoz#L+sH
zYE)I;L@_TA&+L}#A+4trY<tT89=?d@GTPG=wv*(#J+%W}lkumJr{`*5$QoeUyNa$N
zcjw5F_s~kNL5m5Bi^ZgI<wymEg973Y=Z96cf)epWBWaZ$o&Sz3Yh(yQ3M-ks1jqz-
z@BB?5#1DYy#rb@IEDoROPk`-gZRhyB7OJ*qG((d4tV7yhE#y$=R6)XNIt>50{O)0<
zly9zSJb_(5ztodKZu}0Pw^quWsb^Y_E#eTep;8ac4*$0yi-kgGOgs_t5>)1wMM@P$
z8D+=*Z^*TwcsddN4?VJ+5^lJUj<BXKH@e%sU<qNd$mp4ZnI(R)j7%`8K-ZgDjb8Hh
zJk}EaX7qB;Jl)d=wC>wMdoHVJ6mU^LJqRNRLejL!f4zMAuc*ljj)mzi-=C%Cyq>Q=
z?!eRj6KW!BpD;kfh$lTdnNylrQMypc>14LmqU!#A#K&P#5iRkoY)rS)44kJ+vU-Iw
zyDaA3xN#CRN?>N)?l_1PM3tx1u5&Vnr2h${;E9|p_x|axGs>6o(ZI7?gM!ryiLqy2
zUMb$+7M4M}WZ4f97)C?wksDO>8^I>09p|%Uv*ry4RxX88^_(n)o=vsRsx1zFYkdvt
zfSb;@qk-V0nef#H?*Ubez#NQ-8h6{#<>C__lvB<H_+_OMgaqtP>xvufEGt2fABuGE
zT{VetjBYj($+NoTot~RD`@w7wUY@;3bZuO~@PWl<<IsWypTHibr>Xf!ht<F@VukNf
ztS=LOcR6s-d{LQKj-;qb|G&ix%-tY7ZZ)1FDL^V-B0@hE8KZ_re5}Y&f-u1mYpq$H
zlGRjr9kMm50x$LtgC23`l|heJyQWA_|1MvyYOHYl(}?CLKK}q$&Yskt{0q7A<|ftb
zQUw>m%lr>|ma_V9vN1b_CeECY^GzKI4s_gj{dSn6%7cqI+LZ~4>KP2_WfsS6g`+uK
z8XVS~KqeYLX|8uXV9a~=_dbL;^8brI1P@A5HTZ3%;s)k}=|m95mi=6BBOsRp_Br9N
zsQ^WiQ?l_}5dZF}iojgF49mCgrUqit@Yj&uJNqrm1DHL)-_f$a$d;oH=-H#+Qh4Z<
z6n+m*Q9Bih(zP@DcfJK`{+n;X+Fl`uQS8Rev0l65U!1RmAgF5zPF4E54xSj6aBg{}
zgP%!&^r}hfbRN`v%hy;0$Pi*61nU>f()Y}-GXhsJL+N4xo;IRBPs7JhYUvc?BV{<C
z8R#-7D0oyF$xxL|GthL_hhB}`s2tVb;PD5z=3rM$8QUPI_?&gx9l7dIe%4spPx*Yl
zF>>UysXi_8oE<}-+4LXm?!4rvyQ|3W5k;AvK_M5!wM!S3?qFB|`i~K4)Kn*a_S*gy
z(K+arho2_5hWb*<=uRC)hdzZp@XN)iUXchFXj7D*>T7bIpr~OLh&p?!C8O#Y=;Ii!
z`*7k*r7#}zP9&c0xq~CR6C1xG$q&aFvq041<Vq;tNp=r=(O!4gTxl$JZl2jMOQ|I@
zrw6!;4Zngqo)_JzsmWhDyrdalYLQ`;V!kb6m#KC3!<Ty#tKE{0FRMtRdiRo4;|p*U
z!;9C=DFxYA3a}1kTJvky&i51~vL{~xP1rAoZFeqN6d3`$?~i?c-vcS;M}iYdHS@)8
zhxX*^87axh<GnM34_aE}80R0mgI`US6Xx2=;^SVsU`GD#8$vaafA0qbgre2WXY=jf
z%M4rr(YOIxQzB!GN2|@mJA)F;2N#A*9`OA8%K5ki^aIL=dk~}^B;2;gxR=~ReDn)u
z$D|6>A1IL^%koHs6e9iJ(W)G)AgD5T9NCLpc<KzXnr9EbpPKEHV0{{5m#X+ynExtj
z@7v}t9%Z8{yI8mbsvkK4q(50=0~{6e@1lR)k6A6hM>#$kO17HPEQfLj!WF(SvE+R)
zt_SGMayALh*egE#l7<AjA=24BWtz-x@G#eRn@Y<>(cqqVrWr5uMdmBxdJ)B9xZfm6
zqwT7@qidlUupQLO`UXO|$=nx>uwm`WZ=C#`*)P#qnf2fJ=wqvqf3N8^NW3g}^$lIv
zA}O|~1MF>LjV=ITe5+Ia-|=YI55M`7RoRNv5(jWP`WFRTsVc?nqPn=m%BU%ZaE)0?
zaLRq`@|3h7^&?+7><UC_0WoJ;p;_P5N+Gz{+2**SKqkGtb5+@swepUk0csPtmm~i~
zN+VUy_Vr$`)nAr6ZoX6vccuIux#J&%=l7%0#Py{t3onRD_w7=WkKgzElL~G8OBn_!
z4(-xVDY+2C=TGVQoFpZs8M%C89z@4KV<zatiV7opX;7ut)YiXUdw3|&1w93@-x%`}
z58;(bvs|NzgTeFp@|<E>wcRY93gA4Q2VXI6lwd!jn=NtgeNNibAYN?mpykcsghl+>
z9gNCIi)q%d(~#dJ8haxA-4}k?&)4_@o+5^2OM;**zSDh}216A>%M-IT?n}?<vh$9O
z#@%7<7g8l{R;z46Qzs7Cbbdt9IEjX3vGsLDZO_K<SGLXvhaN@RoOT`gyeMrz4u+uW
zxlzv}*;a-oioDo*>XaC=KGV$3&;`E6HS3ou;E6#+2H)#mYSEaRFc|xsXTPmfHnhVB
z>v<yxuS)th?+D28oWu5D5FY~lkySKqBete80Yx<VUYwK7At&)J&Q?JHI#G}n<gVXU
zXVZC_dBhLSAIq88%!)AMFAFk<U~Um|9yTDY92<2@8>r`#)`kWq^>b3=EEx43qEnUv
z7GyR9411=Jy5D0qz!=fvO<9Yqqg`pu24KD%BMyH)`|ntG;c_e0q&?DeYwW~`%8Xl2
zvtDiQ`=fcvC9Pb<#dga}&g~+^_O5p!Ap5VX2)8y>&N@oRnnyRYSXtF_jZf;ftat!j
zPX7_YmmOwJ4DZUOgcc50vV&Bu_N*I4tvKQ66{|SZ`=jocvq5OoXp+W0vDS~{(tTfu
z_x@Tw%ZPz~LO^L*WQ>9cOI;iePl*5vJ6Lg$imuELjZvxLHj&1Y%R~vcB_gp=Ova$#
zoP+s72!O7hOIJXyz@c|^zmlY_8g9T;K@eo!6fVdPDz5XFJsf~rv%*h^x;;QlaK+7l
zh<HRSF=>)ZGV!Vx4mszA2L2E;-EJCZ3R$!B&_m5kQa(FLa@5kd1V6Nz?!^2&ZSwPc
zd%@EBTYNQaS@WfIFB)O$H4<yg7xas$GCey|D16&n-($F7*5jnv>p1<Z<RF$^)uAmg
zN9XLH&~e_QRhW6e+4N<R#*n6+&53Oi6|kwE@9jcAwEPHoc99zHB?`_;bd%KwZy2f(
z**VXwkMmDPk(D??k^>+Xw{ErGAGxQqe-hC>&*s+K|LcAsK&bd?*}KW6FFtaH<szPy
zl+32-Aw;xRB5lp=$d879{)q2-ON^-PogX_lT9dJ1n|tNhpW`W1cjsZ&h(p4ZG}23~
zW2VOzk({tem9K>#$>tI;S~2(Ra7iQ1YL}vH#c`mBd4~Rn0KKpm$urPtCroH}6%PR?
z>T^ww9vbucM6YAz=_z<$?<*FeH`k~}iaen2Z%?KT$;43PXXCB|a|fwag_S|Zi8#s*
z6cdK5E>0onneRkgm0FFkx5${&R%fu>x60%EAp0d}R0h9Xuh9X*p{%?z=B`uyENM?X
zv(6*AQyngA&Be$)JlgL=0VoES2Z!#|aO=EO-1F2)pOOHyZ~L4#C&mPp9q-{iK?1E(
z%gH-k&pvO4<aQIzEwXsPK9)IbU=#oB?+{kX)sMc1=O|(JV70ovysWgYN}Ka=l$IC!
zGXEShLhrRVXeZ8UDpyLXl*7ocD9ITq(8CH<JD!}|%(QS=iuYqx8ZHLi#Z4ZWz+(<|
zsj9-}O9~>Hk~!DZrnM^@(x_?fgY~e_6@2Hu(B7{!1+p{}gJ3#(ucoL_83EzzuyhfY
z64di|D68PxuZ!ZUp`{zLrKYzI4y@K|f8ccvlMQg!oh7Y)YTu+4zj?OiYcToV!u8m+
z4_r?hY+EvHP9X$-UK8l`>3-I1R$%7WukN|F@vjbZOvX3g#uF~N!L*b5Ml?p8YqOmu
zTH^Sy$8E(kX+h_cy>K>vmTuxFrvm<1cII$41)UCwYAF%#2QR=>gsuqf`@AxGMB6iX
zyGXn;!PVwH&eHMFo?#f9!#B%>_4)C2Ad>jfbOyKDe6~xqHv5I(qej_qO9aFsFvt(G
zuDK%QX0qC8AJJBB0avtp-a5IY5AtF)y4eaj*@>$c(>iH+xb_fnDFPlvBhe3HHOy0|
z&gi`rz}t&iQ>sab7yDT8<3xxVL&8O#%Mmp4kh{8n@j3gohDh7Kq3)AXkL|lIEx}zQ
zOrqe?qt1;^$pN_3L}9#`-_8EmsE7+y_4B*}N}8TBv%)&&*OHAXWh0P_Td|7{X~P`k
z5gjKgjeC>EjVe}qEZeHMX^!5HabFA)E=yA<q*TCkW|}<4DK|f0O)6T{$GP==Zu1!W
zBH748#`S1{&SF+uPg{3N*UahF3ZgpLZ|gAG@X)CW3j?DyBC_Oo>q!+31_qz?x#0`4
zNg(iRqTqqBY~1@&ph*9JdVSBWz@|L<Rv9H+6;yt$lhaNo(IubX<4F2YYxG6_bqtc~
zw5oyn4J7SiJjqX)+Sw{k_o4Hs(#omc+b#qc?=;+ulO4_(5?yJ4{4W$k)%V&jU9OWk
zo-`z=JKbeyT4(GRU?Ds3Bu<wvWB~3LDC_j&rt5L(!DV7P@Tv%Xujr82{q4Q-ex}K;
zQhc5fcGyw(O)GUw#yCQ?CX*w&kDteC&GH+DI6xw=7UhdOAvKRy7WqKr(q`3u-<&_O
z;LPbfXWg=LYMR;@>NS+xk8**k+V391nJ*;*JulBy+*+rFR{g5a&%IAdEpy5<nr3it
z(|nj?3_U7276oQBkFA6T^hri?xfYY>`swnGJIC@Bv_^7`+|uaJ5^)yUSf4!SZ@>K9
zdveWdNHq4d@u)y(X0kTlr2YzP!lI-6!njdg?fj2J+4b`p`8k1;0Afx;GbFZbntKd)
z52x>fcr;j-(5#D8WL`j$QciwdJpHvxa^$Xt9oXKD;4zI+&zV-6{<NhsX7K=#a)=v+
zjMc3~t}Lq}lOvJpSsiE0*z!pVVno-ogAz8ukEj=J?b8=ZJqUJ`qnYPrStA)@m&U09
zXYY(oeX!+(d~p@R5b#J@m_9qi(uS#Wf3IsVyJ+JM6vXlMK#?z||0I?-J*&4g&ME|f
z;1!mW&p%zNJQW3-*U?O>7V0V7DyWXx8!A$(dKw-$2by7w{pny@5%1>OA!(X-PTC1W
zsb$V)6J;AF8>j?L>zWom5z-M5OwT`KTFk!)KxMed2tEVJ?mk3pB{3TZ`&=TD9g5W7
z&IX{n?_5ki+h&d1ec<J#Zuj%dfB+%jJmlF(*qZ4RTvvxDTeTYlUCr#W)4v}S|8PqG
z=uSAb!y}M&pV9f^*`eSD7+6bYWeK}XD&w+Dz4XC07phI)15f8BW9f5d*6|))571Z(
zqw0{HyVjo~A22TzTT5QX8`I=>`=TlCa07I<?-CqZtd|8h$yKj2DvslLvv!99SCt>M
zNNbyDbjiDDfZOM31m-&sagqTS3<mo^oTwh_hWXtPm98lE+$?J(LAdhm9#9u0lGG*M
z{koWGWt|AT+gdLB_gvrw=4pAeRTjDX{YEati>!XCAbQ$?laE%RCK3ULlt!rMtWTHC
z*bZp`FbZ5VE!|b^c4UbfUeZ!QWt{q*`i!=o#<iu{@pK+4o!CT&$84r|!g^3e2iaS}
z`PRIP=cY|FE{bM!wlE4EFrS@yW2rkti2nd6GA76nYwk@Nd)TBzWG0m{*^|aj1?Qs4
z%r{iDVpb4u0ysc=l5?o)hCRAY)%!{f9m)a59X@FJ`G%XF>itl8_q5CUV^MWXXT)V<
zjMJbA&FIoevRH{9WI*|OJdQm9LO6@AXaTKSn^^L0bEqgk;QV0U`Vk^9wO9|W&AVFt
z&QqNqG`7nYar2ZPuDBn?=U}dMX}WSjM3FSG;i%d+^^Ddt#o?hDbUsi+yEq6H?|?F%
zjFZqLKHpF~cI1WK+!u=_iZ$31vkFY=h2Y`(Cl#0Yzid*hW-C){oQdSD>d}@{?wSe-
zyslbv()^4J=8NvIfn~300|hjChAQi>+$P?l`4hh024OYju1Y`j!Y-eOtgEY9BQhw9
z?EJ@Ar4Ra|$IaX6!QvUeGd20xuBabT=P5~#GXK@mQR}y;$`=2k3JwO5e9b)H-3);2
zXORI1C~QHHG@MBewb?ap-*jaMZ?6z+e-1=%?79!TW|p9agPvZSN%wpV2c9tJ0n&+A
ztUBHb@2&}Ji6#Ae&425ER`a{A18YR?Ipf~w8LU%;93Gt2;>Bz~D*g!JKpu<qw0BAT
z?=3+2uNHty@$W4FW#s=GEx@WjL=}3fssXpIfrERo^emm(f}ViZ`M956JZM|xHao1$
znjd|zL8H}k?d(>EAZlqW9yP>_lHg@<wJhc#HhXFm+e#?P_S7Yrx9`+i)9$&D-Op>N
zD5-1PsFh>HLE%#yV#I^hk^9MV<XhKuQbS!f#r7U0s|UfX35%&9sth#=s3SU_W!cSU
z<7AW7ZW&z#BN}M<pa;Q&fTcXs1OAcHsEkI>CB77K{Hxhw)@L{CJki;oM`sbpK0g0@
zhp@7am581U2bQnoAPzd9CTCLfob|_U@P0tz2@V{%jn3*Ce$-i;D}U9ac-ptH(CnI5
z8T426f02H+SRcR~gb0%g<MO$i2V?ur0@5p8mR&Wy|51W711WY*8@qh?YFH8a&?H~f
z#vNX>geP!o;YFJ@^Ht>4*rLZj$p1^*usoNi(SMtRdqO=3UG2XNe{rgr;}5CkfjY2Q
z8k)abX)?&c5whz(f-7p5MzzQGzBZ>?)Dh87wOs0^wQoSyyD);Zob4kcBmHNp2KUDw
z5H1}xW<*TIU=Sm8j;T?lM~QJwGafw_#sO%`kmEOPfv3wfI17oN=`>mEwkP$5Zt;nk
ztgC+=MI~l9fIEVQV*9B<LE^hnkL!`TCm)278bzzaMZX&gpWt5&1y?B=c@S>=EL-C&
ztb^Tyg`HcX0eR9gB-%J*Y53`7B>m*BYZGZ`{Xw2_5h^1$sX6NLSai1T)T0W4&+;))
zabRN8VZn^n*@5}Xd)n0UIrg}XgAU_+X4b~i=QfrBc}Wzc+GV@MEb!nB(&Ma4|9DB3
z&)Q46Vf$V5MAu$%jdkw2O4@a2XbQMkTKDgg&)Fkp*v0Y{wIazJJa|igE&F3QcbP8+
z8fY5krO0`MCieOhjFMHsP|N(~!)D+v?r{AH6oq{yK8bgB<Ie7ze%DyzM;y23%Yy^o
z$ZR$l=vvW$671{<N+~|qFI|sUC$RwcrYqmCbh7FbK9vm`8t`!d)+OW=BilH&r9bi7
zT$7i_?i4E{2(vL`WP%;!g*Py+-U+w<Fwx=Zo>Sk1fJ|722o>rT)ptw`$h_N)y(E(Y
z>KU~YE_QYkx7yyJjjXAXLXEhEluJ2_C^0)D>Bx_L9_q5%gU7VzqIb9K;b6andiTU;
zjW{zCQO`^sqy^lXn&bE}_KV25sfzu3;eHnvxv@MyIQ*w8_4TW$gdMl>Xhd169&g;l
zpE#o#kc!BBcCL$4xv#sRJwEnYmnFt^J>)YlfBVn{axL_wi&;f7Ay@hs_EWX+_>3np
z(g{n1%-L>MJBR(;{Cjd}!l#{3#7Cc=wGH10NkQGoC9-bu0IReMaNZ-naWk;q+}sl~
zX>vVCdNB3z=as4Jgp=|rmZ2X)ogCfkn8*)53KwA6t?_X)|Ek6D91o7%@K;YBo!s_?
zSmc-!@{`-30?^m|-4Q@@#Z<lQO1-2!R8<-r6cEpwW%1$o<q%f%lyu;oqY=EesvBbm
zI|FH&pQWi6%~2x)ZW?dh$T6{qRhm@El|@8OfvA~J8=A@Te7sn(O6mDO__HjcI&Ljj
zzhc&lCJ^vQVoa*rn-RyoS|%cnYzH<ObsgE$Ew3GpQ@{3w)RyjP+)u``yWJv$GkcwU
zOyfRq){^OH=T{L-D$p0iOj@n_tHIKeqO?SeyW<J=`YzBLlcCcmV2Xx97x%ok42gSh
zdA`}tqrB{Q%~=;q)`yE460<$j-@82Vk7b+yAx6n42@s=&{q9G-gYm22NZYYS<kH))
z^RgE3ZN1JrNO_dKRj?nc5N|&3@Z&ERixC=j?jW3*rxo9A^=hkUP6rx=#+~hp6MBL6
z=IXKh_3hP@t=^T3R>LI!-rK0KoCr(WLrkfJjq<yhQA@x`n?Gt0D9}(G+;TH=?E1RG
zX1_LEw3lD8;utQ7;oGPai*0#Lnw<{a!LpOf)wM2zGU5Zr?q~WhR|A~W<dV)zHnq&=
z0}<+X?U3zo1k;vKGfvZR(5dT6x%}#`NAI4!`zy+kYh--f*es_yB6P%4Z~G;pVv7Q7
z-L0M7U2~@;ThR2Gx3IBO!#(PFfeey?aaN1nX-Owa3=F%Ola6=KuezNI19Fd;=Qpf?
zGf@8jc^N}(F)+Tp*SLxCEeMDz{NKBbi6*gggcke70IzaUPsI{eMK>#d0E+Aqi6J12
z@9ys2f&KA;YS8NxbCW@EhL7E{68;lU`={*P+L1_>mjz`#;iY%)^?pHMP*C{c`)*H9
zdw6&R2a%CK5fGRJhip@Dp_7p9?d>!lDO%gwf|@0dRV>fcNYtGdg4^4X)SFcOMAroP
z2s%9CPdXXyV_{*LU-<N}{kW%m>afEx6pg_5dl@a>nw^!Ul%*taFWyp><P%(xV~+aW
zVj7ZH;E8=>|J%*ae8~K%e&Wo>(}qLk!>O{0hR&_=!mMX(CwlsOlrwMkYQ<`In`$SC
z&zqKdHqXq8Vu&V)+OnB0t3s6$G(T$G-0D_I$%0L#{~$g2HZ8wTKp>`W50OIi6Eb*+
zZur~}Ip%OFYW>=?StWAqNgAou@h5&0aX$q;;Bys7J<}}yz_^%|MP!YHS9!E*y69e&
z?HZAwVyl0$H;!1Xt@$25Wp^Lurq5JB)tQPaoel1H4kpC3m(qmoQvb{88WtFeVSNpg
z(x3G3{YE@aV?f$5JdBt6ZJ=*cqn<~W03Pbj&tTc5FRvWQtDZHOa8$@fB6s)&cbfK-
z^o|7^1CrBIXN4YiaTXSNYsdsqD0t+N4L~q({2{jbZ>lOEQ0NhvIH&=wM2HdxOQMHw
z194=n%FRIk$G8HN&3nt!cjvjDia%Ss{?j^h$sz%|R8vnVXWv^nEXKqrJOqELTGrfq
zMj^fYX1RAQbp*z!1=bj)nc`?|>s8RXxmCq&=e8og!G9*6xm~Q)1FM<`4b&9u?(RnV
zWqdV>et|E*ZV}NcF3(V_8cuvQ(Pi9rNqRa=N9WAm%Oy@pyVaO{CXiBGizQF}OPb0i
z;iX{xOS#YySb=w32MDp=W+s<VWvJzGGB+W-VdQFdJQ7N8w!&CDbl@<LBna@-$DklY
z$hzqed_G$usN>tzr8w#K-hPwFtFtlIAPIH=j?6m731;Q+ywqv*kHQ~GzP_mD7qR17
zB63V1%hcC~Zmmu1FETYdwwJ{_OaxcCW+}0DIh0@8#;9qkA6J)@GEq7nODGbaG2Phu
zrm<FpR@J<O$ZQkuXTTK4dRsItP-ixSG9mHt;j%#-^L;X0f*hwTxi)B&b|aE8y?ia2
z8k9RfAK`8^sWhK=$O=Q{?YLT4>3Nis!C5d82oZN#{J4*w9ZVV_gk$CEjxLh@&^G~r
z_lweJC4+^P=#gxrPVH(J!$DyNLp4NBW@n0OD_I)^oMm%N1@D2%p`3Q+TBK7uL|Hmj
zwAb|-U^?^Jo2Y8{<5vyT?=Tx@)D~%#r{HUZBZbeC$l*<tieO87Ttv2dekZhAP+FiB
zlP!@pp`g=GRVjim456ViZ>oxm7~HX8hFI!3i}iIj+YFr&qBPWxS6t1l^mL&VAH_J|
zrK4v!2~kH(@YBJJmTmvsp~%lg#kFxpqg5)a{#L^;#>(k#B)1>>qfI(tY`L6frz<(@
znwn#`*n%qsrR8!b<ysD6(<TbS)Dx<$4-6xGAet9ViRto}n$^$-q`j*jq}W|HW^m1I
z<6u48j3t53J-e~sGPWqR)veqU7Xe%3B4pLYogc7~ZQGuoE?Ly8(x3^dJ((FHO=mG2
zi<>$CqdaHN2-Ds?A^AQFV|y~}EQujq66V$_cC%OcYyrbmfu15_B$5x-&@8J<*Jzll
zz!W_tm(u%APlY{(N4$F+>AO$?XM5h@47SvZt#n^M7}t*0DtB?|DOe}WsBu*6+gO=z
zl53R}I#X_-X0BAFHSCk~)hM!Lg(+IdlZTcL<_W3|%2+t)MyO9l*ox=*4j5RlIi7xM
z>)%Q{*BZhfyR>dOcspjh&upa*?F^E0Sci`43fJ})$uv-Zhd)B>5})?7bHIOoe<-U}
z_=57ob`a9wlmB^K&O&8N_~3HtHQNSlEO*O^^CnniQc-Wnc(Iu+?Y1Z{4aHDzJGLx;
zcs$ZTEqkXnCpNq%L~8Y?pTlH@AR1Y7PA*#+5o_O~v3NqMqFdM<RsE6)q0!zEo*9)G
zYgW22np0Wg-PV!IU81dF5tsPF$Sh76hWm5d7fs;5ZlkeB1BY{4XrdBOL8{;a$RB%5
zNYptuyvyJJ)P)po)ieIEk?;Nk-JbWT$W4AwaeGnkc*rAMW?Cgn-+5lguY)-iDKdSX
ztvkv^2jvJJc?2hzsYOsvimS9Rd`Le^RjV{zqrWtr&dK0zaanA9jm<6q;OKy#@`^=n
zh9y+k1W>bPq|LqahXjW?7TOMt=09cEWU6$Uw~KM=WNt~X)~Qu<4sdu&CoSA{W93ET
zab((?=376lebMT~q-)=n&Nr-l&ai@B>!^ac!-tGFU+3h;#qWnjokyeYjw;6qitI~U
ztZ0Gx&R&xAh<#Tm%Mnz7X=vFf_py*aEI)_^r@!E%Z_RJf0Dmp97`?^#&^a}})H1&b
z)&eET&ukTZyWN|r47&`=<FS#pG=U!9LM?c6YAI2}6jNqYaqVETs7rQDl>!AJa)xpV
zMRhUx7>Dwh13g>KkGTT1kL+hK{ddK|*9C-7M_~A_w)w8mYz;f6D%0t6cz<P?Kgz)7
zAi2X8Q7r3E(8dcD*s3u@KvYvPACNKdzWqQU9~P&8_rv!F5?2PI!jy8vA3P;i8OPeT
z=@-ZMqPV=!1}z0V+3T6?&eK0WwQ3<-+{MGxo6q~8>cA({49mwe7WS57G36A%lgNpw
zmSRDn`K^H)m|2-DP-=BSdZi-_3uoDg#NHs+nRgyp1dYeZxY%1q4Ktp^V=AsD`o`M@
zo_!jAnvFAt<>r87lR`wUuVcjlzrlBJf@n2a@Ws?jt6%s`>9)QA8PgDZS3ZzNbL{A4
z_%v58i~3$#K{x?tx00sf%XtF)kBhluV|&kaTM6ei!;R!4C%#rJ41_bG#TqY`m*ce8
zQ>@RlW4SUNj)tVlFU@w|hM^-{Zz`H_m8duI$W`If^k4+Zf&*b|wSjFBGKW2wN-E$7
zk<ziv8f5i`BRS=<nz@ymVw~t1U%arV-fD(qtx4i+&Jq{60rOLLbb585sGV-E;J|{o
zN7-xLk4@(m`k;HU&>mn=S}@d^X$gb+?7$B_Vy4VnaaL=W-Cso*L0$UUvsyz{6*~{P
z>a55@%qH+rWd;2UDZ-prW9p<VhP-Q{2l4l!#~&aL{7y08uJpVCXjV;RM;4rDS1D>}
z>~O-TGl#K50av8OX0N*^+*?k;v?oy{(tNPOdFe};oLKnOBUIT=$(?#U{Lf687r&n9
znWr`-?2$iKKHgVno<H#(f~;>c2&$2u3cu+GWxgPrGVI8v7bTk1d3xxnmiCN%!~f3m
zRHt!mIPqBaEa_rU&970>glgu@*H)YIyE%OYJ?a=GMC6A@ErCIqBLejm`=W*grz`QN
z6gDjn1AAHBXSx;HG-U~K6Uv`-n>w-w_Cow4RgO(sGyVOu@gs^Rdy>ods=0uv>idEi
z*S!?c(#uV2ky6xZPugfjoLrzO{m_-v*z&%b?j6ad9q^6}CgqLGJOMI$RkhLh#EZqH
zp5#AoEM&=~3w@C@0|M9pqt9<0S?+`d-_}eBEi?nw^68O&A1=iqMBLg@7pv>J-li{h
zd!OneTwM7UG3SY5f{uc<C{sQtmB2_}&X+<=Q!ZTG57Kskk03TByd}QN&M0c=hlV+d
zV|Ds{B>1EbR2Uj>!(iL5t_28SPNYnLGAHnTxJ(GLI44uf@Cu>P;}>D$o|dw*CH9vy
zXa1pL7Pvpz{Lt^U1D_#O%+aD*iv7%lg=jiJ`1P{%yo*=J-+<l!UPdP=d7X^U?%e?f
zP0uZ2cFiZ@#!ogYGKLA}2=wO&x(7Ht4e`4z-Wl0sev?^Hp(wT~)>6@>ak<%bwF-uf
z^@UXVe+ai5nU+=5l)-(YP9>I`-;>ppVYCB%nc#!=zn4Xqref60g#|WS@P*Drlk&*<
z@$0p+-&=b7lF;OmK*d79Vxm?+`20*dTS4ollQ~TBG|J^s(}rf~Rve*hceUqZx{?g7
zp9znJ<W528?){qJyZlq>FH*{N{BvF0rI&W}K^*%pO%cw#yu8lKyw1+N&bRiW62xs7
zzAG^{+&urvH<`K@$HaMHQ<)>9ot6<kKWCsY%e`wkqGuB~S9SFwcjxdO9gS@3Ou)Or
z=BZeoL88m&9;n1d%Ou|j)15s2H1lYkY>W$iz_X`Z1;$a|ihMqv*%vlwVQPNBO1HNw
z4otMQA7q!W?ez}f?tUD8h$DXhWL?&MUUSApeAA-Qk&D7JNvudBoJ{hzOoAKoj8VEv
zGV<1<*OD})jZNP2m^LAAcxZyBPDfe~)wc8M0W}D1S8)`d;0U3NmJ3Ejg}xc@jdTcZ
zU8W*59+0{>&d=*$ru61>RUPf_=Oe>=qwr5`H~ZJ*$Ii8I;=F3EifXi69SatwV|WXw
zhgGx#$3)&%cQ%qk4i(ipEK$zs&vwjJypm1Lm#?me!mE{bQ~kI+@y3Rwd@%l_7?|(;
zcDzj%bJ}8invW!*=U!RMSE=c6GQ|+N9+5~=-4x%#nUc98WzHiT5qy;?0_<^V!aURH
z<H2=Sv(mnK3S+3b=X5Dm*5e4BJH4I02AVvg`void0SvWYxw3Ij3Eyi)Th^v&9RBFQ
z(p;XoPHydSY(UL37MIhG8mk@FjrTMmFh%?-I_>eB&eO*VUc?^%pj2Ab7#mLk1QKnP
zv75Wqft!1$AdWWazCNSSJ%MeiG(*aEJiPum5bbhd?!5EBz<@LN{0P^;amjpVX?{jt
z=C`L^N%3>fr)r_f7M=%t%ch!g0n27tRmh48!_?K?`l0UY0+y(w=JFGVjgi9I;|0=t
zkyZ?EYoe>SlblAKwx~YIO|E&dFKUNTR*5^G)Q2hS&Fsfic@Y?8O1aeNjsZK20?$B0
zd+e2)TQypP&q;k*YPH!64tOwm(yRm|mTjK7mpCDBo_zlCc?FKi<6+y)JWtp#q8g7Y
zyHUA6Dl}`udjfm~D=2xeY;!BPI<5g0$ub|=bvQm;NPj*ek=C6YCFt~RvC*pGQOx*K
zg3XB@W_Ur>iTUNI^9gI-7C<37LBS`VQY$vMGT(S~UM4Ov38wpKj665+bK2mY=Q<uP
zT$~FKG-(M75S&m+@GM#Dov<}xS}i~C2$SJNUK3yjxz{VZmTzuVE7iN3HNj%rvq-76
zmxksNR$g{?CnvlLb#)Ot4#=W;c_e=Or}Y5(%SQ=E@^P!|!dUHwPl(c8`dV)1d*{vp
z3A(WY&aHhj??E!rRd4F?%bRbs?pn>7T4Koiqnm%cb|e=ld)BK7HtqMTLA|M;!<JY6
zE|V5F@=S99+aMrH{r%kCc^;)eDquQ-tlT6%#K!bgKYBIe4iLyKGFncn#p#J27WHO(
zr_0!T-YW(`25Y|i1!qS~rkI|G*&Biy3*7HC*$9tJYsHP{HlbxI!s+1<-X)t;WP%{O
z$<nfIw1~WX!da19iN10&bFEY5y&EhT6fp=hB`=Y5uDv(upKhn;Stt0-U{$m!TP6yz
zh8lUbCUyn14lkbcV!KGvFbjL0INZzYytegIdz_uM=$Lcrwpp3H83i9}ojqf#b`GPU
zZ5DfNdOI;OSE6U0u$!gxnRXa=%}&d4czS1;65|Xd;XCgguG3HWbRPp*H$1E0H=a+{
zI=LCYdgGH=9!R#KWcOs<W_h1uulBmz(wik=w&%8$dp4uN)Y@@3ewMdS2S4^oxMwz5
zCD~6il;XVmE&cW<(a)86Oj#!|Lqllmd>fXuVAp)xQxkD^MfICoYucQ#_2<<5_A8w0
z!O32tC!hMRZQ9F0AdqPBz_7^3^rwx5ag6fWA$aqH^8qPwa!1X<?NoM5*9C4fFW*+p
z_h?T-LMt-7SzgMuk|;ZKoFM@J-~a4-OKMeh>*_1sjZ$O0a6dA=tc%6Kcs-lgc1hV-
W)^4#alH`5$Z%K%pXx^*WAN~(+R4=vw

literal 60796
zcmd?RbzD^6+cvssB?b|Z8bUx&=^h#d2}uD7X(^E$x<iFgnnAim1VkF7+o47pl!l?Z
zyXS1bzu)t|&l~4+-p@Io=dZJ1hMB$B+UwqHueh%Jx)&cc)fLHz>4*UUAX8S7*8%_n
z2>7D8fd?L$@n{YL|KQs^R(lKpWzi&;<^<q(99J#HCqQu@bPfDGW~-y`rmv<dZsFv>
zV{Ym6%8JL!!5Pd20Fqwf;7125H*;n$2YW|XaW5&Be{zU}pZ^}_Wnum&i<_Mki@usB
zvz(KQ6|)Er9}gdkG%+(Xv!siqwYZkN!oQ1y-=tVxySX`w^YVIndh&P*@;JHJ@bZg^
ziShCY@CpcUgE_cey&c`my|^7+S^ssB|G194m8*q|t+Si0lOyxr>zcoEa(9zrVfkCp
ze@p+e)5^>Czt`mG`tREUZ;<!z8D4%KKHmR!ZLp~1-=pH1wq92D2J*HJR*tS<9n$=Q
zd_t1{6!>3G{r4*WQ&Ii@R8&Che<=E&PW{hCU$|Pi$T>NHmAXl5Tew((mA`g!`**$n
z`Q*O~O7i}__y5!u|7z%ej)E;MO)Sa#-=3N@G0smj9PmNjbXGEO1%SKv|GsdPwb*t6
z0C!tcRp%)di(Ow|*Vfh!2nbkOS_%pZ;^yY&;NVzZUS?-!Pf1BpP*C{z@#FP1R#8#$
z^72wrQWDn}dwP2M@ZrOYi;JV9BXxE4oSdAiYpkH4APff6($eDM;zFTN2L}h!)6?wH
zSbRS$kw3PkriLX7%Mpvc5r{oIJJZ+KUs+k%-Q7JuKNl4hy}G(mQ&ao-^QV%M(lr)4
zHa51fun-&^%oKrLU0od=9gT^J>F@79IXRh{nnI(|v$M0NrluAa7K@9Ej~+d$tE)>-
zPp_=31bfoX&W^0C?1KjnXg*+vhK5W`OroQsIXO9bcz7Bc8-0C!zkdB15)u*_8M(E!
zotBoizrQaaATTj8F*!N;`Sa)R-@jA7$GW?@^YQVm?qd%R55vR5t*oqyi;IuXv1Mgt
zR0yn&j!u4lzLAj;Gz`0YjD7OtNq2X5U|^uFt?m6Ntdo<|?GWtQ1(qxro1LBg>C>l%
zhKBg~_?ItVN=r-MkHJFTVFw2X{rvodg@uQQhY13(1B=*)vDm`GLT_(xzz6H-=y-g5
zEGH*-FA~cTj+Ky*=;`U{>+9Rw+q1K?BM!n23=F(`_pZIYeSUr(iA1)xw(9EYVlbHT
z@$t^iPG@Il8ylMsA3l8h_RYY+ATKX(b91wxpkQrnt-88;V`D=_MMYCn^Y`!H{QUf0
zUS1<3BUM#Z&z?OC4GoQmh;VUnF*i3ibO}yOOl)p$4hjw~EiElCFHcTRPDn_|%*<?R
zYC5^V77k#4{P@u}gZ<WntsTXtcVaItu?MGE{cLP_6IMGD`)36!`3d{F06VvhHFbaQ
zQi_#J!gkMN{i9P<(y;MAv0I1OkXmf{Z>(7^Rs@L+P5d&sj`gp^Hh1;$MPc1Od@8DG
zfreo1f+A1Pu`D03UrTF0rhK*Z`M~)Biyj!6oLxLSJFoePu59j{TUuRN-<(-kLS_|o
z^bIHHl?{$f_5T?ko0+>6jQ#ebVR(G{_r%P>`<!C{SQS*3f2`v*u{ph^K{-B8wChx`
zw4wQuF6`D~7O!70@r_o(SjETQH@FI4QMJnIM?R)}fBN=?(j60}_woZD${*Dg_kS)f
zR4AUAR=XSx;I)2vj$d$6P26=7H)W(Qk?v|Oe&F6GCTJ`x|6iXhv)9l!uWWsf)?chW
z61Ab4EZtrIQ;-&D@id&eKhs~c0n&7Fs8?bVxqjysPS8gBF~$B;R39VI0!Kc@dN5R2
zur7{`B`xSIt#DnJ{Fo!z_+i9qu#d&eHt--V=fqF1OOThwdBHb=m?mwNC-?u%JICD1
zP{BNaG?(`AhRb2b#`NcjcSc=a%<pEV<iPG2w0`W!`Tec_jBGw;D-I(Gk;wz|q3n?o
zx?Wgvq-&rP`l17~EF-J}e^A~~U&J;Za6Lee+zdDuUMQ{eQIuBpaJu$@pPsp2*TcP#
zi)&6;_eYSY*Pa~8<wDdgg9^yu)(5|elDti0ZyUY2-qlb}H>cKrzO*t<beB;rl%2Ua
z@U)g2&Mz#UqK>Py&4#8G{Ah>akg|P5!gKr7xs1E<;e7$f3JD2GKaOnQVzMcQ*YPHg
zQ6S&mt{C+rVv>qDnfktSa3UIo>H6+zH$>j>J9j#_d;pQNlh)JMPokS{LH-P93nwPo
z!I3rDTgwb6E^_03;a|WPX=!6_P754D{V*PTIxT)1THq)m50jugc`xi<elW@!eT*mj
z85~M{Kuf%_zW%9XUv9`w9`fhUCzv6o*xr}<GrRFg@R|dip*1OSCQDWlFXbjdjYU)C
zV}s|5-5h9A*H?$OCF2WRj}#kR98(SK8(%eDVRT&^idoc!a88t)@Jg}iXZf)P1)j-I
zkv%@Q84(rSY$`aTa&|!nMc#x=+w5otUjZdT>H5BAW!=p0{CW(H&)15U9~HrR$cx3?
zDrsg~G-WQOgZwF%F}+oqe`aPc7yxn86sF6YK#X!ZOXOnSz)<S25;D>_UshQH%?}VM
zv+Hy?zHAGiV_O4)6xnzcMeK5XSan_E7k2`NzUy0N`#eGR_coshAtgf>*rXioh+lC8
zwS*5uv^lgroRnjAQ)CI<oKReNsx6Kd4cf7CE=zd_Xwtn@9@2u&Fx>N|ZCM(qKfVKm
zk9e&Z6;Qp*)LX%@g-`UpP;_eJ=4@S7Ncf2QKp48x7m_kCu#w;ePhvcuWop$g7nFqH
zJ#8hZGg|)MDE(DjxhZ=3N7vO4EoiZ()8eyYq@-QEiu<#!j!Ib<7LTbYOG4#x%2hO*
z@(KxZv!XpARjG>o>L_TjyJ-0Z*_Opqwgb)ud2JSDiCN}kc=@C7MJGU9d4I{Nh|K^R
zfiiYP;-O`gNzgwI{Mo!oY9`f@2GiDZc$vm4nATel!p1+OpRp-(cFeb#TjB+3*fA%^
zCwR0$9mfW1`Be8;%>Eqz&_$Bj+FtC%vQY-uaiHZn6KEI_hFffkeKM=;<Ais{lxJkc
z(bM_-sdp<gzs&PB7;yfkuS6DDPmIJ^hN|BjubDym-Qs`<Iuvi-L>jlyBBvm#FBwKx
zUGm+VDygWbdVgBAzuovQcAJ(#t(I5Ggdok`y>GHb3dt=cOMzZDittRP7VK2MyCfV3
zeTK7JT3o!%u_3-pKHJPD2FuV(wd$B#895VGt}?BiuHdj{B~%tzvyBq#XXJon)71#s
zlnIC1YY_PHhJ_XLY_)ri7>g8}BR#8FP6*K>jGPIMY2D-Flpev`4{><z$INuLG$bw-
zBJ3O9C#i+MQP6>g3WNP78|*i1nQW5*_tBH$9F`~_MVEW((DT9ogOu7X{nAX`JBHFv
z_SF@4hcDj7F?T9K6y=B0&(E*IC6x9Tv>cMz6l3Me{}6uu+PWZS*FJX-$y<yN-EtCa
z=r>$xqJ0(IUwq(%oT8rC-i@qL-47;d>EB!6s|!}C6`Xk@k{jJQSr!81DveAL-lE~>
za28b7rGC$Cz|f#&#C+Cvj@3QT;`bT;l3M2Xw+|hHeaMf*OfuZEsPE=@sbi8}&>%dC
z99U5j50*v};g}&U!#Q(}l#rYCGBuQddaUIOFi|NlhbU@yY4QM{dKs^>vE0fc<_U{N
z_6VuXgv4K*f+QBK%SRb;i!8vWB=~8q`?Xqms~@iq>o&fit;QuR!qIGufxD4yT>O(x
z5*;r@BgB#RJIh%ft%}~y#7)+5YJ0zjK}BS2=+KO@Y1-+Cr3L2@pKLvFG%^E1C4Ie*
zS?uHk4{aGe;==nQQ259L;1(Z>qi&Oq<Y`ntG}mxn$VBKxB%V^T<ITo__svQf(bR|o
zYwsb3m#1vRbVc2IEoslrro^%A1`bWsjTBqUd~~zxQS^vGGSYC=8AU;q1NB0iISU#G
zSbm+4iO5NvvwK74bZG_D$PzjcL`?ixwdP_%fIYVnIpB|QqC%8sHFAu<ai1n0A|fJk
zwU{qWUt|O9+0ki90!ADj_;^{j17p8-F6?>L^e%WqDb<Q>RJ4Jb!q}_h9GE06FC@GJ
z=krL~{A>P&Ta>MN7<=mkf9v!)I87tP=v(vH(c4rA_bf1@3}ns~9}j*{1cEK!=EUvj
zArMejkQ5kGFYg1dtrWeIH~ad_xL(u@XyE&KJ}GgC-ZxR7`(bhvK+DE#8*5LeeZ{MV
z(k{B8CnD(WF#%xHuZqvUcujltJKIlYbOeY<M|Jf)GAtF&w=AdbjlYk!egUjA$@G`a
zm-4#R9tJH&8cID7(pWrbrHvD`h6eoG$Q`8&AZWg{Z(j_^7iLp=MMraXs<^GQqRfj<
z%;$%nJ`_sxMl24(;QI0|PNyXOy>3T+hD_v>pqZpdd%~dPRhUuM%xe4t$Wv8yBzGdv
za{RMl?Y=k!gS;#PtckDg8WwYbTnh*7q&$O&wlszSLN9cLp2`oSzZGc71Z8j`)mE9&
z#6WgrmPqO6NH$&|zjcx%kZ*L~6xo1Wulw_+W6oxTmJu;XA1gnUj#ooiBNQiTaYf&X
zOd@1S{n|;uoY&AG3CzxDQA<lg{MnMpX5GWR6E#(#O=Q>a?tlpNMoWY3B>}p}Qr_vM
zn;-<Ks)S?YY7Sq87a4(r=njG<cbHAA+;(r-oLpHtJQ791wuU)y3uPnkOfXHVetX9)
zx%y#WY+?C5vDn3JtL!yC^p`v@BP)-Dy--V3QMs#!>-~{;Z|jwpWkT%e(S5()Y7{NM
zbc~L%yii(gLiHDb3qiK%ypZxX%zf9-8Fxh*eL0k?eWok+JvfOCog4*J#T`l#_?ZaE
z9IxMUWs?e7sCdd&a=-H<ib;7yQYFNx+Iy#;Mft=%`h~{t+^51QMdZ$#)IK8KuX2Ca
z>~DV?p9^o!;^>5e9#Jf(vXj{p&dtP*o{LjQPT}4G4KejxBoiJSToh!waZ^2K)_V0G
zRTcgdhX;KEO!pczkXu_LI>dZ-Qa6wz4&WsF=U=T|&*~@oNxs+oQ=8DOqgIB5m3zsR
z*^ln+(^hLj??%6uPT~uP!Cy<e4A00SpQ4u=_6gEh&ICL>6@;@tOT3|D|6Lfr9{KX)
zyk?u0N5I<q^KmK;$a80z)9dj&Z3({L<yJ-pyW+OUxtP?PdY=}nCBZ)!8jK(Wh*1**
z%19#TVL$m$iMMuQ#)`;4sqZKmGxcorp&j-*XyFRCKXqH;Hho;p1r?4bAMKfUA0<gt
zW?{n9CMh`}i4(d9tn(dnTH2DnK_afOQnNlfVQ}DovrfxBs&REr68^_9BhWCmF8B6J
z0-QQKIci5<;Ilwdc)ntBtYv6(=cT-x*z~wkxqla3RcaxN^6^aAI#f~eYF=DGwY;uD
z%&Sx7*oiH9uUN8ytk{#S;`p<d;@#xH&n!Y^F}~YwMdV(}`(rkvXDX*8gY<iK$(9?j
zaB^jOz|a*JT`XG)9hWte^yevvEde$IV;&<Z0~bU8yl^0!#!2!SRYH2+#Yw7u7^M(^
z=dHM|RDPAWVVzmOCma+8a?ZaUz@jwztIfqt<kdfz?L+*OsKlDAUK_c}2Yb~{-mm1>
z0Zt(VT06Qrr!I-SiIIKVH!u7PWQR8!KLr-L+bC+tpTUPW_oF{7F=Ci}K4s6BG+(C>
z1U*M?nd+=Oqjzu277+XLD;IqpPoG2&<(v}mt*{A6!OgDbk-{Q!Ri%xu7h+TI{)s>u
zm_C{pwF~jln;K^+H{n9|JmY2b$K3LF{qdozV||92o6K|B^K~HjrE<l1wYKH<78%9(
zK!%#A=$+}Le6^9{TNMB2$w10bUf!}hfg79?DFh);|Ld8}|9@*(G(EVit$HGl-?H30
zK}<o>p7r6wyr#T7`~k}s*d2=J&@g*G2_l(*Bk^NW>B$f9Q}emCl<9k>P(-ZSHLT`o
zUQ{862Euz~bG*Vr@0sq)N2yP-AhTxc&yX^d5;WY64tKQK3H^Y|v*};P0U&8o_0;zx
zjr1!d7BlB}Z$%gWUZgg<jCi)*ANp{Z_F=&)4>zr7vjRAw7hiA5EYHcLU!|IEs$VhA
z@A4$$0vcVJ!3{Te){f{9)ym8M)=(bomV<dx8n6AVk2t%faogNxj!MNl|DM&;M{uv>
zp&b8y6XE><tSH3aa@+F`LT1F74%o-FJ8?N<ns*LeJO+`;Qgh>Pa7{&H(Cy&6@@S&4
zvoQl@InO*F0pLOhvuHv>kBjYUVWcwlti>0^Ulw*bfEH8L8;sQi1Z77pm7C)`=h6$#
z-{Xx3ch~|)AO;d}SnDGQ*YM&Z)%*Yzy-zCIVvoGoOHt+9SJ<bL+lW)%RGdwQRXcl{
zBz#~I0<pAA^EWL8jcZ)3#K{jl_NYF-Hus2oQ@?)OhuW#piv)PMMm~Ws{nPOg^;TCG
zpf)keC=3w8HN!l4sW$Ob+BK`WzN+&0+9Bf^x#=fd;Je9Gxc!p}e1k)lJ&BK~eLrcw
z{<UE0iy#tSp2W=N5v|{AVdMAgL?Ijo)y-x1Z(T6V@3l+Jm<RgnLwWptAEx86vf`yb
z>~1q3YprzqA(>vQy#U{=+k15zNVz*>NoMR<w;grUKoRa0M%Hr7RUWT;N*}aAXs~d<
zz~?Relv6M_XwgKVAhhroo^zNbuJ<2FvSK7RTS;5Ty>l;unkOeBF8E3I_bIzZl!37s
zCH5G+O#)|degqh|LAw?1CJ&qspHLv~lR?XIvp$TsP&L__-L+L$li!S~q(>lDI}JQo
za?!PomXV3JlqztV^J8Mdt`s_#&&-a{P$r|S>>Q3r`#qOFBTSC2K=@SSzl^PEIdHBO
zE2TVmaY@#KTcN)6+f;e>IVU78Nrre<$Pn9C9}v<&AasAXsbARi=`k+sXJX~UXsp?Y
zjw*C}zZ|dT2?X0%KorjktwH$WLG$r{neMkr=?z0VI?TN&oAUbhhpF8NImilOUoR@{
z_HihS6?bBU=@AcU(|*cX;|{7ZTKw$HI7dZB3>#!v|Ljali@v$sVypVs<koBgJbO%&
z`{>8JE)-*0od@v*+GLXXedT^dur$?K^RFW#2{=hsPk4yyk|BVi;Ytte@}2t$Fl(|U
z36PfEIsJGn33R4y5*Te25(FI_Dt4Oxdf|xY9MV7reB_!SHnXFk45qz%1e6+wgA|tC
zPXnH$obgl#s*4LtXz?GEu<Y%ad#nx|8Enex#2u8M-U6&a`uJEh6KJ9Ipl%CC?Y`sm
z`f=#hdo%U13kB@gj3jW<K>7f(go#FdO-?o~pdY7iFi{?tlkSkR=AK5y^nPKg>8(C2
zd%Ll+2Bdfcz%G$*u*=3RY@38Ryem0*sEp7@1Fz=iG@ryDY8-&tn~J8CI4TJi@%rb<
z9RypEy7dDHN8!)oRfclwh0+v7Kyp^&!<&6Ob~Zd&W_2iI^^A)8>|Tup!kkEN_jSG*
zAz;=J8s6f~&a)-jo^?41e=Y7yD(hn7ZhTi+x`QhB9v4IzVh}QaElL~7#?W%30ta9k
zC-J*6EtT-%)?LK1aU>HkOI%PGEbm&Eli)A=?uFMKN(6=4X~w*>r7LyC@zb`fS__|A
zDIY}PAJ~uB9B_W4Hw)$bj?OA6;t_#p5MWdm@@wEU9?k@wzsz<s^+c(IKMp)UHi19D
z20;7R*x98?0NGy#<$p|$g8U)`)562#Dpta_iWb|P$d!n8e~N!2FQxX97XcmAqLYRN
zTBvzGetx*~@1D+Cob9r{xTiircDi%VRG|t&bG~6H6&l`o=PLm?O|9I>8z|$Ab}bK`
zI63~ATl5I>cRS;~5O=qKdRBUQx<3Hu5mE<>j|ly}_Q57)dzN2LnH*<EL*+wRsWX}b
z-D-9kghpPnuq|U32Pe+y8ejq^1)PzME%zP64g6nQi?Kj7UNU6JO9S7Fe!itbaNqz%
zQ`F<{cQ%_)3q^h_VN8fv3Cp4mbGlm4=Ts`|@l@EGsooffq0Fn0{N*S(Q6~lxNECVT
ztopcyBTmqB7m9DobM8K>UE<*=nHoF*f{cz$X~E#kc2*cK>pWXG7>Nh0BCUyPKR{-0
z(pTfHviSy2an0Zdoretuen8P~KLBDH-U+raHN=scP4$`pdP%XQW*MMO_@`ZY7!Bw=
zaqq>9#R8uTqHwTiOl6x0xa{4|=wgrd?WK{Vq(W%mEZ*Ox22Mk6dc)`W=4Xkw?~qoA
zQh)PAhRZ%IzyS`Zg55p-F^{}wA4ko7(PEYxoY#EMxGEigcuV%oNSeL1AW&*l6$eHE
z`YmIp*XnN}1Fqy_-U!j_dk{rRm{ioBEqes6ws0C_XW7BB8eIvx`Pc4`#J@J;cQrR<
z!{n(O$Kn4rQJ)XHgs*kreg8Z9Pfs9OPgQ8A`R-(Wt3yff=%g3bUh&`7(}b$C^I<#Q
z9gmRm;Hnv@KEqKLD)&kU7puN`zL@?FF^=%APT60O@~q%AFK7c<{lj1j*@t4~3-{CA
z02(GGX73|bU!239SX4cwzUK+k{H;v7<Xx8F$jrj$-`<OIZ(5ajw|7PKjMO=TGcoQg
z$V>uc;4qUm2Z@Dt^F@e3s$r}V^?HTl{P;KaQH>ZBqlnkfz`>65<EjbtocU0b@V_Tu
zg8p-*kt<l|Qy>rhUWmMx3HPN&=leqcczaTM|Jq4IIq90|E+EJ!kV4D_D;{L%XVF1h
z0?4eyO=dvLwt2X^XSPv-2R)U>9!TyJ&P3fYsy#|gz0<z)4GVYCdW?b07cZfTSX!b&
zqK0BU&p2L3$bpt6WzZRsaQ8LMh<F%kfU$Y>7bZw_nW1wzs@9@!Kw&dRxR$C~pYn3;
zBe95d^c$DXD~j@8i5)+4H1L}nTi8X{W)5L4{u1wGtSLR5xf!^Fb`~~u8~7w9m}V-G
z{?Q}T{P#Bcbxlv5xW?x*$x~1CHO3RMj@K8NZQUb=8k{l*)QS6k<|Ks2@25YdZu&N;
zQfj#nkDwhdvDZ;<Z4d({LyEygTufOK!4phIZ`G?D-!Fl4bK^S~II})?Ro-^0Ux`by
zFZz0Z^lK*ZjJAGyH832ShlRB?e@p3mr8hor&@>Qy-)`I+dB$eCWNQjK#gVDBi{1Dx
zh9M9xgW-%RwApmM+t!WtxzUP)qTxy8@tfu-PJ;uVA0&4WriK~UtN;t~ZHJH#50rjT
zv6OF11r`_m--B|4YLV~{eh82;vtaF@VW7VA|0xwx+dwdvkGcuf7z6g*Ma_2R4lVs7
zS@yAsEniE@?FWPeo3;trkcLOX#khKJ-hevV-?um5&)?r*kFW<2Dg?d8<Nm%gf9;E7
zHMOl&P%fMht#rbyZdv3{@PC2nKplSwO%1QDdcvSr*8vFB+^z)l3WBi6RrY?6nSW_-
zT{N!=r^z~X5g(ZYR<GF*3o;M{RY=sV>9wnP@6y50z_v$SbqN~AHEEbpeyG*=jr&sk
z^Vf%^M$qLuJ5qN|wr_G1e=f|1S%tth#Y~_c4+n?&&!?!aJ<z=t@Pg8<UIJh<t=~8g
zetMw)O5Zjr%@{!sf5bw>aBa4RfF&YqHn&95uWu)ZCaYHuuNm2mB5taFZx~dvW~hSz
zU8%Or-do?}?a<UEvG%LP@7w46nnbZ8LAjo6<9?I?uE`GSsV~`@*y{j3zL6(B{W}Ki
z5XAyz_Ng<ogp}{fF0XF2zDS41>CNlFcL@v@m^`KSVhlX}HUuEUHLQi7lywVHk*BS5
z&Pk()VVN^(BotCnz3D6&WgsMs;fq?oOmuEi%^OZ$^Qh|{eF<$UVpT2|r)@b+Gx+Q)
z1I(7cu=ipJLp7fDWD?DG^I?8HATndEf7!c#6HsyxA;~y^FO*uez1(?%6ZFbX&5zvm
zg=hna`0`o8*~X7%UY=zfF(FRQhesC_REc<ruCUyGSh(-*lQPV6+KScaQ1b>&KuKO>
ze9gm&x8F@-t4a7SV)4&Q3`$7grYwg+qu0d=y(K+h#O5ipFSWsf!CxqaWsXMS-B2yH
zvc~i#(;X84^yo{3G=_q=Iub95T~OmRgDhrmzSJ@)(xKvDhoj4;#hScPx#C1TeVHmN
z(sF|-P-9|VhabM{u_cV0l%6W4VZj<<;Wg^Mmk`Bfs^H1z$!UTPR@n<OWr`<cD87i%
zFyJP-EcI&9rG7ZMHn*6ZvI~bgXob;*W7bJp2Er_wX$xc^Ggg4zur%R?-5C+c1<<0A
zfg2=f`Ou8jkk=qFE?f5g_$@$FlR*?B8cU7SlGt)@RsaM?+t#~Q!L{F&+lp8gc!C}d
z)l<u=OAs}cz_@I~Xe_!@cL3my&fS#`U;h^i13S(zD0a>B)F!Icy}Lno6YTH&@S1}5
z)5^)$;V^oIo#i5(gq^9NVTZmipC+gKdXU0Yh-on(=;eNAPU4k47r+xk4FIDy&-!t0
zs;+;tz%T+q=`WVQxJup}>v{vUaCzVIQs7D9kkf5|rCH>PIcXXCP$Vrd94w(M!KY4i
zt2yif&>T>g0L+NOM*^XYdsV6Qu?GWk`;oQHS0);>mZJ2)rqtv!>t6p4epe0Ef1n#r
zGpjwRL|E*_&xGxBmVcC^2ASv)YoX6)tn3Bz^l>92$xkwbNf)fNOd87VZ?p>G{}P;b
z#H57DZP=H`%HRRN!uqegeCU&jKOv7Ox-$w{s{z0TT?=Z>o16CJ8vwl+9#Er_0Yt^r
zzvT$Dhvxop_R0S=`D9}v>!T#WSYdcO>_t+E?<q0heC_)I#m1gQKWH6=80kq#>}P$!
zMjxcjJ-6^ZYgFbXbzhA#d$GFB_}nhG6PtP6S7cZ+^@gN3rwuyUs6cm@Y{BV(S&&q*
z+MFiT+$c&ffh?<v(+#1fIC@d}$3WWhD!!i{;32g#STTL1sivXRnPShslD0zIK}Zi`
z_c7Is8(1^FG+2XeZ}s~pyAfxjFtNy-7u<Rw2n&okbSUoB$0IBD?4oxNACjY?c5jJ}
zlF+mtyT*?-CvG$u>5Bt`Ovh_nsYJeDc6KGsFBqasyZI%OPT1T=)!d-xqzezU4k8O$
zVL}nz%~Knjf)R%>t5{)51fJ|ebm2|kcv=Lx7dJb^TjWoe(8QKU{d6DU1$D>VgR=)L
zkRoTUaF7pwKb2nK+h7MrTW8N$eF_nv*4x_;%<{U9u^fs+_&3A>A$+1cO-Ex{-lmZY
z+T}%u-|V_$1MI$wi9-VUBGP4>2VJCS5W|4xj04-(j)bQpsy5z)V%*RSE?^fgnYbq=
znstFsQ>`M~P!}~+TK#3-d8yCE@$SWfdxGM-bSu{v-7oh1JF;r?i-r1<%c@1^<#1g2
zUacLuT05QHJ~MZpA5cXluMjXXG;g=^>W(uBYplNE3kA{yBNTO^$I1DgEqvSU2~Ty!
zAc?tJJhQmtHvt+J?ck?Fn~Txp9(VC>+RI(N7k(3O-J9MPiLzPb1rk!9mAO`*ZE#ei
zlP{>{d?k99?E{JnSq3w0g_35JlZhFF<3pdWHu2B!ewuo!?Xm^2H5tVhdBV;aNSGi2
z6No7iBAK+l-tmCl;b52^qQV+d7aA#{3!M?gBnd=lms!QWh?1KDf;z$J;!Tl+8M|GV
z&Md!y-Ybt@FHSFvIVWWb5MfZ3B(AALMTuyocQE2un0dWW%@gW3Jw>sjK+g$D%%8uA
zklXNh_qd#9tj%HMVpNpOFgwp0N@&_iT2YMWKk{R!I$EyTf<0z9E@CmGD40BvZ)RAK
z-p2YY-u#DggV){{bDDT-=(TNAtOX58nbr6$VwJqNfsDLYKRV`PPI#yM1r)D|7k~3%
z+xmQoZugYQu_5c3UfexL3*l!zJCldV5FY#LYriaOc2?ZYZP|sr&-`IpYv?O{?J!m)
z?c!4<+4!N5K*U%7K>KOu?2yBlE|f>R{e%(Wi+WY!^44-X9K+bwHs`Q^C#7rrMVK7I
zVor5yvy+@j+V3>R%aX$}(2L2}9p<P;PakUjDFuEi{Ro2Lgor}xhi#?0eI<vtq?>Z*
zcZ)h3&b<;zTk58>kAi6t&iaY$8M&HJmXFY91j~~}C+f=}C0{o<Xq-DGUpsqCzUC0-
zrL@!kL4D8Vh5d)Vaw^}z7>6TZ|JD30ImwS~kdi*9V)085Db%rOPJ-bn10q|ng>`Qx
zn@3AO%xS>L7%+tn&JyoHcf!}XcC>B07rwdFPZQzH)=aOt?HI{%Z!BxX<FOfb^wdV<
zq1d*x1IzhoZ(P5Zn<h)G<O10Al2?{31(ij<uHfFl=LlEnQIW*2d#N-tHrx^G^`}oS
zu5jGp3W4X?#EO=79TT`EvIDISBeWB^l*RzCX9@n~a2nzGSM4*<4Hq0U5s+;1ZTK)n
z7xpQ@piuz`toO|Gjps{M&(p$Ov(@4U-&`3U<(ccqtEJ!ROt6w$EtN=5J_+LX$dm9&
zuL-s1ZW3jDB$-bBzB#eM+ig7Wbrco#&UspVuDlD5tomMKsbarN*oumgsZo2~kEuwJ
zL}1o`S0UA*HAK+`SR`xfm@CsgN581cX~xWfOHXv4DiM=<@|joL;LZmh>)Q9N;W9dO
z9ZB96y|}=qW}$K|i|%rMXulh^mB-?gv-mWT=v0TEfYw&1^qnCXd}8Phoc=g?K8%g`
z;N*R$$!+iE`90p=GVO;ghRDW(9~2G{5XmKFK(2mg1-3@CPtK{_r&CyM?KfGlO7@;~
z5A-;jeQ!>R%WpNVG>Q<NqUR@@3lBqW(Vr_R8BS0je$Mkf+`lKqdF(e*QZgM)i3njM
zFBwuWayz_WfE>^b4q}=Q<w{3UiH`&no<y^F(sd|~kuGSy6{`>+6ojz;Di1CuPgl=7
zI3cT9vzlae*u*Gg7>1$LS~NBZoj*;Te~Yq=^<LV%AqDx-EEaArg?u%G+`bX0`k<EQ
zv~984v$cWEDC{vO1^N5_%}?|lS(>dQuSr|1X%LEmf~48*_eR4|AIk01Hc+a0zN{wn
zzieE#q9p=Se5a_fqb7x>Fhi<^Km%;$w}D|F2qL=~8y$FB#-3^pC5&q1>5nu`AanD7
z?fHmo$uv%0z4+x)XTaIc+vE>fDs5Gn<Oz5D)S*14cPY+ZRcE?YmU>gs$E3P8HU;SU
z`}<!NpBo<?)vx0nb^bEagobsA9)GZuJM*!t1iio4?lRjnuzj!TN}r9x!{5)W{?C2)
z|8vgq*L~2Yvlj_uDUz3q8;&hAs5P{y{?#ugideEo!OEsq<DYG<{iey3*s24}k2nM6
z<?kRsxBqh}7>KiI_{gbh^brXCDZ+U3<g$~?=S~Ht)lZIvX%SYw-P#Ezzi2B%xHUD0
z?&$MCT*FQnNqA4*-zCr-d7f7O1bPhRRVU+gkC65ZX*PB_(4Tr_@9T;KNUXT(^;NB|
zlZcp3WJ0fFk?fP@zU@apY*$7n7G$FDhsG~=jURb7iXVLEzJsYYX%x?`Fl^eL;2U0Z
zs~<IDWarD@S71$cUlj+KopOU0D+_ds@0&I#lxJ~*o}`1bnafEaWhU10Kn=Lz)ksdg
zCH!^e^I2<)KFOL?vrRG#TEw30yz<WA&3AV6$H(}3?j33-AGO<CsVyS|r;gg>tf%+5
znGl12@~#^qEw9P*M-*KceUzGN0l;f^+B{%Ay_y!-9aI}~wbF|`*==s_{11HJ44ZPG
zks-oLo6$(P`*bJfWKD0m*@#bX?^oVMIBh5)3)Knvq;2l5&X5B&_EhvHh<Rrx>4O!V
zQ-PjpfnPXdUB$W;ez<^EYVX?>zbV)zP1#7jCh&ReB_#%B^oNWW%?bbB#iH!dSgqS+
zQwxj8Z^C?^E2ly@SeIGf1-{#g%8c}20JHi^=iB3Fl?%Hjf#M@pP@mL0i2df{Ph0AF
z^;MF?YdwcWWiYmqk#ok}SY<#Qz1}|AjA5PxW?5=4Vs{Lv&p>BX`&okrRMzY*x8Q|i
z-|tdhU`%OQ=I|`atJVS9B9!d?^Ta+Vf*NrjWpAIF9w_rz8c)P`RV}4)bWNz_h69@+
z_SARiZeG~Ochz<!u~Wx$9__czR$P>C8(-nLbSh%<7#5!&upY8CWf;G<yuKcT*}t(G
zA&)DG_Bka3!3ZOkE=mq%oD2JeRo2KI-kO1cINH(oj4mAYUyJw?l##3JnW8S9kRS`E
zF(e5a(m=H62NNh%;|M0~)m_kcn*V-nJ>y3NeEwVx6P?*8fFeR=#33Zlc+P&YKrVL6
z7E4<V$HxfMM!uPQ=ME<ul;_TuN)Q2>D%UlK<(ZKLEd)bQ<9carABd8fmLkIi7@@dX
zC_tUQpxmOMiw9Vv-?a$3PynTzoF>pd{^2yyeD1~@J$q4S`{w0%rLlM)Oxyi|2d}G#
z?1;j_XsFe7YGpw)-WWAm76TAk=fFW%DB_Igj+UtuB-(3|LLz1D#0Qt7ejG5{8P^rC
z+m>70SZOR;*5^HMXe^*WH1d)zBz}OYrsspR;qtmMa6?E$+USQb8R`H{*OY`7CAC;o
z`u<aP%CXu#TFpCm(f|Yi1LCRnU&X{tbp5tFhbOyqK7T5V)<3pns3ibmRQjyIa8w%`
zUwxvnE?S_)cpu;S{S=kt8R!el2^m(&nYvqH0TGJL15(|~X!3{;+{sOGfIeO@tmIFB
zOeGl8IzG<P<;EP#MQW03z4Wv*i<>4P>5>qh)P~;{W$iexM1>}t)loUEd9!*HJrW}5
zi2MlNhs{Y9%_#019A=$F_Y4>#%`W)EmC1kd<7nF|B-nn3xc9+D-AqYQ-~PRg62BS1
z+5ADbI94n8jn9`(+VNM%fy@7UdN3eq!KbcnVG9D{xw$c!HgmTl@}IgtZYr@AksLlQ
zhlxXM(8JdJyW9(WuY_DF(OHN255F2&{G8o5laX4MQOz3dp76d6>~hk37C(cs4Dva?
zZ#U03x6Ih%W)z5+77Oa-i-<)mEh*APUcz0oB$)g&w`n?_=L8oTfcFA?LiVJT>LtTZ
zgGOJ?$Jl5WDP>PPR!?r(mj;?cqma=o^8>`4<y|nN;xOK5)cS1L<`2^d&`B@|uNkKs
z!*>kmhDFfyPRX0};sxa{FWUgSiDLE?J`9K?cLsp&Egmr#vPsfM*O9LYl@^W&1%N|(
z1r{^Gi1#Xha~GdN@QP4R>m*AyyB<e+;WW%>ovWWYF9a1w00b~=2h;AuEun<pZ9Uw&
z-@G;cHJgW8_}|coemITs6?x&!`d&$O8!>K}!Wn&#xHMmQIz|A(svsyD>&q&AhmVLg
z!9>R9IDt2oN%tY7tajR~R)z*4Ll;`B;VRYf0$i_`_4m0^r}$iS9b!NW0jlO}ocf3(
zlt(Scs3pV<AdSs$VX^ht?<A5455}*`3iwG^`K)4D9gIy79D>?J%f!E0*~SH0>{n1;
z+7H)SXTM!z=K1Q*U%HrDLKWWk`!C}GT9cq@ahBY`6vJ5poQ7f`b*$+yqAwwL(}tsq
z>DG0f-CGVEw!Nj1KVfodA`w}*oDdZ<RLZ27?IDf`s<Z=1wm`~fL9;x&MT=4zRptQo
zJ#RdK(YGu8TcvndoUrAi-K0WJ{(9>Ms)n7?_?v1#N1?td!-MZ(A5pXEJjHQvryqNU
z-isT{QC^}G`ZhfRFD_l73VO|Ps@*t2Kfio=Sz+gM6PQUg9+tP8_l^Nov?4kWCRh^|
zjo)kqRfqrwhseg&Mrvr$V+}#byAq2~gVA7VyO*{v=5L*lu?~iz-mt=?_u|ZdqrmpW
zlS%~*-fhwUg8oFdlQ@(=o5_&VTv7&Gydv?;?1^p06%+1}DLa@6#&_Mk^>J}qF+L<f
zfUGr7OF9-69el+&@=14-cZ#CnA)h2<j$=!2d&DF%kiM!YIxT}+gO|lFxR-I{QfH9A
z#*My;3el~z2X+PRD~kQT{K<Ce`tg9Clp2_(QS74CH6t)?iJ3p}mH7<@rXX~I$Rc&<
zHd%P%ubW7B-T>cQl!$7y_G=a!J${I%cSJ&CCMDues5#jT-Ek%r5q<jfT2;o<R+pQ2
zbV)wUZ-Zas+7k^FPD;IGN`l`&#FPFXa}z%Wu(@~E%XL3FcV8-LojPz6oD}5@_rTiB
z7|_R9M#J|1mAg<EzJkstm^XMetJ;W3LZWGA?&JjNe@}B<c?`fDJ?ql#){=7_uXV&^
z%dL9S{9t^lL()ea^l`zn8nEBfr3T%&vM%x7j_M+DmU2EkL9))}HElj$SqJ_++3&~@
ziE1nGXiBl{b=*k2aQe$m1buPM5BlH*uS?lcI|pPz*_H;I=h+4Q5=f2m<x6-H)A&D#
z*j@1T_Lwm8MbjgQ0k4}Sru(W&bV6P&N54$8v(5-eii|{2_SDUEvg-H3<WoUMN*QI%
zEx~v&h`2JeOyePQ!h%Qj@C=T9nZ$JKMw_su-CpCK=G>Ye^JU{=Z+5rzQ`f>8uhEr4
zq5DV@z$I)aWZETGThes?eE!jsd7WEjY5?*iS~pbY_S^*1^M#E0ffAjwT*nlrJ4h-F
z*WYn3SC-fj3kAIK%HUpw@c}OG8A0Uj=D$FetwNfe#dxEBkX;hunc~Tc+kA)W&m)LP
zLlo#74Uk+3om`WwnG<P3#=aT#Adt~dd{f0z6&Pak>p!@R$JlG@2gk^+3##Zq+o<$s
zI5Wsi<8&vXI|#4oR~`2P_((6}>YI%q)1eHEh*K1wkH$1l)7YDa-_fkZ;+yK#S6~g-
z{tFKEypM{iO16kIIAVA^p4GpkZMe5@8ik6HGlwVLIg81l@|E5g9o+^a$DSY&dMotC
zVRCGA&&k)w49V9(t+JroM|2~hT(;-h#Tg&|?%^aOx~hXSX-iP>+g^brnI!dI%dEzx
z!shE-)<f99^)graky4nqN*(JZf7W8%EM**KDviSDZmwAuUk0x1n>7&>UxTP07{N(u
z$2HDZ49LzforRivl`!hvz5#&ju3mJ$a!&kr+;$0FU7E?IgIy$DFX=C-?e6gUh9eRi
zJH`FwK`?@uIGV8`o(y9u{jHH1oA7nEKjGsCd$WJoiNVEVd65Q1Hu_(cO!Ahf3wUQs
zZ07_S%m3?C-j^=scTm7ZFXVmt(WCrBA<<3M^S=^=pq#{6{o=Dem0lIK-tO*->%^48
zbe1r=_PLI@sS4MCH-8hR2^1P~b90Z?dNI-oc?_(7&l75w<_-VLFBs7LkH7$uGR&T9
z+`gu?ws%FNF8sk?rh+%-3-k9s5;|cMyNvp`6p?UyFi;)y&Ci5xVp@GAr5lCMj`QC+
zj49D4A%cW(RN8!F=g)4btfx4i-0$QS(jclDYeDDiL9Ka7-El-{5nD;k&`~-A(Z6Ym
z0dwqBU9rggr)G>f;pel!sDOihCM}CZDGR6SwZp_4O{&$yTON+4zHoi-?ARW0x9OhD
zWJ2U<@ypmxetUH2&|+DAXy1zOffL?qA4O!ZC=T=N5s1G=V=~??6X98Ke?7U(07dOx
zA0|e>$0Mhm#u}2ABMeNyX=6r18!6J0y?4plx1wv?GamCNIHrTaJV&+s6+_2dtQo0z
zE_Rpql8y^@*>7=ItyeT6TO51;d}1Jd%|qh<mBO$$KcL`Y70XsXW7xfgQ~GhTqvusU
zIUwF>$%;;_z&Jr?HZ(fLBjlR1G3%_&yna~QdemGs&z+qPuJdej8s0m=?Er_@p5p11
zOQ0{r)nUmT7hO=<gs`=|9i(3Fvv<5B+9d+iC~XHU`i?wpy4hh4mo8x-x|L+s-P4pR
zD7;Amh#=3%{hL@#9O;Sp)T}dhrRbRxKgy{?8;pG;7pustIYYZ-aO_#u7&rBytl?2O
zR@0lQy_0@5v2vh!DI><vl(|vRYg?5qP-`Rp+v>QT%Lm0rx=_g?`@}_H+VRBWGiuvo
zLSjT83aJ4kPhuo+0uRpMK<$|--tMPENunUV#X?%dNfmr-a~zom%#s=qk9Ccxj4Ula
z&{P(|2Soy2Y6Pt(j%w^oBj7S2X4Wy+<p9vIV(_TDFgRwVwcZ<^Zd_ng^nEo73w!T_
z<yb9EgEab`qd$>yuCt{Wepk|B!>nRgd+7EX{(vo!36{Rl!Fq4b_$3L^S+|tz&?Qj1
zYP3{qH}`}9pA#<m3kBOcaA$G7aR6uk+(T$g%hxFJ<1HLu%)p9AyfZE+vXBtyK@tVq
zEPnA8u;ivhoZx)^obCa%K<*}k0i{2YTAc+gn8nzgUHuOcDApMdL5S;Lvde!`joG-F
zwrPzQ)M#UfF03VwEy+BoO0j5MJvc5_fS$h#UJZ;~$x+dM#1Bt$q**fV5OEE=KQMOc
zykmgkGhtt9=j;xhb*nEDvoZx4oS`;+j6UpJCJtcLz7inW7^zC|41NB=N@FC}5CCcy
zjXeIQDujDbWEu`IbzcKe>@seRlL>$_g0%auY9LL67%BGdy!lx={F1xz(l^y0D&nt<
z03WlCN*U?FZDEhniM0pREA_LhcUVoZuZ>d|*pq9-^$=H(K+sioYUg+PtElSQss!R2
zDFot3%rdnTO_U?}{82T~5-$2D4tlQ@Y8y7LCf2!_%CCQ|kO9=8<%I{lRNrzrzFVkO
zCAsz;kXpQpR3!p~)-~P-mf9u5RIBHmxmX~Sh4njkyDVB>-Bljpo|?P?SJIf@7!Ofc
z-Vw!`&uX6kzN1ylZ@znUnaOy-lsm9twIz8IIE{~#&@*Q}e^+gSod_Op{mltMVCF#i
z^qP_ZA&r8C{q|dU!v`T-*ylYFzaV9mQa7;dZJPr$p9$FgGy%70;F*Mfl*>@7EPnPH
zBn2K4^qW{8==C#3JTsLvJGM2cMx6QoCmErWtLf}d)|z&(!*UR)AuNe^%g}VFf5yD7
zWsE@C0uOdXZiD_q7Q5UE$#-dYAsTmq(_mEs>#ojNIL$ZqJe<#kp*efFjt4d#2@9R(
zr4>fK(sqLul`kwUW-p@UT%GEMU^d$y8!4EGlipFNlw=C}b|-d`7f>Ru0>Fuz-$Lbj
zRlg?Ev=Fu#9VVyujvs$E&0Mm4zO-Mp5ib<ZTg6rW3tl4>?}9_!r4F>*N+#w)T5|;Y
zl~&LwnbEIv6h1EyvfE~Jgi5mp;*|c>`6U4N0!gS=C%uXaR(>Qv(bq2-#wFC9niNlD
z|LS5WX6=80^WS^wB%lGEKM@#m^?N5jTeb5JTmocH*S>+vi(>m+Ve-3T^MxoX#7VDc
zF6n}fy@6%?!hmYVKRN<B9N;sM%f*dG*WIqL>o9D+fEXqZ;G6Y?wf_$O@;wPg3Q7p#
zXmCqhtdy=hbuin-cpl&!a+$G>GGT`lO_C9!Uzt%>8|;{3gzU)%Qs6Xx41zs%kV6C4
z=a`qxsGX#nhlgLke73Q>La5N}tR-*lM6Yo*3q(|fQ}db#k!BA?pk#VQT}UnNRe>GY
ze9dyVKPp+njtk<A{W;Cyx>zEsPIjQN+2G0C3YCxt{X5UCvNBcWcJn`JEbdyEH6w&`
zs4hr6@pmLhqvcMOFXHPf_0B@zU0>Wx=k5*FlE9uG&(LA&8l&9N^ba8?q3tG1_~<(7
zW~mP!QIm@gg^EU+?|@>h#a!F*c=TTtVfWr%^zEtZUDLpxvAG&y2YZ^(=ish_i*GQ$
z64;dUrepOH*ABdX*xygBea<l2`)H4cHpsnRaRW~fGFQr@CaIPMlfNzYiMXfQa3{S7
z#Hqh4mVHqx%?}_4n3v>h?toIQ+>lE75o;3`7O~D{Saw1*u1n6$%MRghi&L6+Zhh3A
zg_+ANzVW-NvXc9UnnYiX&kVNnpJ!={;sZCmf{8S{m1;(8hWd}e3>Y@p@qEdxK9DE~
zQa`*lQycnnq&M>sky2GevMUo&sPaVKwWWo?t;?H;I(iX113;-*puZPwjkH(lC%DDT
zgabS)+-sKABSF*qPQY#(+P+>Rv@^Mo#w2n0Zf9<>ZBg-YIx#0dcfYI`zbozZs6DJv
zY-9<y#oiv8VK#Eq^r!CmbOXT!C?On#re6Qm5Cr43tbV+dOAVw`c>4RdGTh!k<UOQ*
zGT9qChFo(aETJ5YDQZ9IkPFE>pg5>Orz$4;hb`;F_lo-sY@dByax9!_bC7+!<TqCG
zWpQ)5;;|#DuMQvBc_%tyPr@UK#A9}17J)=p4l$*KVVB|HbaT!CY6#fQ6s-(UM4$kx
z|2f;v^oY!&r=iRm2l!=5XOIs{RWwmHb<@e6u8uWD1#M(GApX`1L+zN*ai|-nusEWm
z|3&4%B{^1pro`(~#dTAMcsIdv=aGYaYBpx>L_-6Sp|HoBBojchXJP*_l)`7XV${pP
zdd6cq(^wj2PV-Jy)j*`@C~MfZdCpCO+_Ayo<Z1idF&QJ`-hZeFb{eOJrl$2w91s#^
zMw6V#npMwJhO4<|?3<idZT&dZw5(Z*u@YnptoEVs2d!xawC`vU6jtMW-brg7jy*#1
z<Ht%>WDE#b6?o0f(~r|Y6w;9CPb`-}Ng6{|XYjG8O>B{(R!{v5!(8;u`=bPzNtefO
zdYLaZRy`p_rk&Ds(iPf{G>9hzhS~f8r~xouf$p1;5F^gNItBc^+oReQ@Ay-5%kkP!
zWW!nU)xmu@;WO9hi+Sus_)*2{f&*Tol_7$xYz)(-dnzlIPvx~>1gfQMbg6c)yF7TG
zZ`fbc_whBi^5Ww67Xh1zZ3G4No23CHzCO#^A2(d*ozKcYbWzt|eJu#-Ef@>>lm_Yt
ze+7j4b#RyW2420#mbmV*h9?fVMP<?S<iB1_D5#R*z1nRI%*$aZZ@tgA_7~~*MwCok
z`;;GGZ1X?-4H;qqVLgQb*-~$EC5Ye6-_Lii0mqE$8Y4GZ=pT}SAqM|@Ey2>|f7B9Q
zK_`xvaRHCMmC(lZ2|>ClWW)TkQJ2iX37y*%2=k(k6)&Fu`em9})W?YR=Hf3DQtZS1
zxHxj;#?{~OhNS(ZNUtPnZ8L#aO)jsZYs<*aJE`UCFZxK!k*p2t>mH$)29G~sUl+qi
zl|<YKe$G2fV^Ra@Wrrn@_DwedoRX8dFO8vxP$3x_8oPUqD<`049W;8eJ4;gDA+zl>
zf3ITu6lmEwTD)%^(;#TeBRZM-)T6l|Vsv-wDVvlAZYzw`N$pTq*t>BY2vRQbybCaT
zR>nl?iSl|?`yWVU{2R4jwa+=CPQ^}7a8P6U!-h?l69e0B_5TG(y{w??I7Ow(08TTQ
zvRX8u$b$<!>M{hhe0oI@JAq5ZmolAmjn{64-LONEhmfVf#csFy>B(-vY4wYVU*JAM
zJv)E$;Vw2H-STLD3*!{y0651^U3HxlhWk-QpyvHdBp}RutqIBgqBd{Fr~!Mn1EzC7
z#fB*nh*?UH<eaaFEDt3j4dzvUOow)0a?~S@4a6OjACej?gDKo)!24A<E}B|>=FEzC
zHKHl@#A#^r@<JS38plCn!@}f7y<Ix<>nl$ds<%@+vTPYTvgUZSp}(+f9K%~~PNF3g
zg8tz8M`Q$KfS$LY=Cey!(3D>;j@L7v47?UjmyrmR?<CCr*dDD7IEQi(*jsMg-*9oc
z^No!j7a;zg7><gl2e*apm;hp1P`(A{MG+t(b|;u*aR=ZVn}Wts8C$XdX$AZXeC5J}
zhE!AtUo^GU$jcEeXo64(`2>qo6CM~}^be*70}fD33<!eziGCD#<rEfZf>hqEQNLJ#
z#`z_P<UR5PT~|`VP}@*ImiugBs5?O9E<oKy0t9Uc=Bxzav%CK;b0P;?`X0Fs)?zJo
zrUSnG8fnBjQR(_$WPSdod0$@acL?=2=pq9D7C!#d8#f*?6OqaDGu(gtk(!bDaZSvF
zk3{o0<ga~oHR~(ukN*5=yp4V9UdE$0w5HBsSF*V<fjF7=KJh)Nr>O`NAKc2Sf0(l3
zI%sUs$NxP;EdMN@KZAeylm9;yLN(4X2!Vqw>`hZdfE=|s4rCB-?$R-&80u?nc+s7*
zj*FO%neGO8$6C74Fsdy+f4aIW?ac_pRJ>|+A6woM67Ol+x_1^LhG{p^@HOS#!1;7s
z9g{3rf~scPt4cEBC2SzN`vgJ67;Z#1oaOXgUH*-_DN)dIXy|0grU^yA;qXt<cn+3q
z2kH3Wc^87^Pn-f%Jb^}QyRw?k*(FR&++JVgaVj%uDmaoAOzJ5Hyi$GAX9@lv4=XG0
zzdFQ2pvB#Vd_bX?%;>Vry@nQ;ozn(dVuK(>+6Pn|K}E@n>4#zZg566dCQn*C>$AUn
zl>B~!fxWb2-SGu?4J2+=NZej{ttZ76P?~*Bh4wKEtcY*)r>(k*{6pXIv>^4;aHF*O
zhe2NzV%^z@^yofd&mzU0>vs!?G24>L7tqu_!q<bI`<ir&nt3CR9!19iQRG%HM>EMm
zW+BkuW&MBA_SRuhwQJw-0t8fK5S0>!7(hfRL25{)TM_A$QfU~vZ<HY<hHj+0yU{^m
z=#Xyd?yh&~zVCbQ=Xv(~eDC+io@0)InS+CMt+lRM>-^Puab3n5U*{QVLjcX3wn}f&
zNzRBE%N1)ud8DG*(}RR{K-W0TXmW2kK1{03NA(VB0!s4x7+d?@{i0|+u^gJzVmi=6
z8B>Kh=dgZwaqgU+l(Dy5Hfklfh-9vAs1J7syn058O`akA#8H^r&?tW+s{N)hI^|Hp
z5pkn^2MGx+pZUZmk%bLAO$`r9<t#q8u%`SZzt%2oqBxTG31YQl9swEMDsy2h2XRJS
z;=!SwHZmCn1<d`q2pJlZzxU2k@2%df5qK2WmKM-E%g1HcHMM%&XRs&%@I4Er+(_)k
z_4=Nfi3GL_WVEhT8RB!X1ETJ<a@+B198G#go-dF@;;Z9WGsV&{{kt~7AUs7qnIX=g
zr4*G)`7ysNk=0TJS@Mm1)n>W4UE>CfOVe;jOc4wlxN>)KRMh(#mt3n_hvIjX*prj9
zsM$5=j=j{9;3=0Piqh)Jo15RvehUk}Nhv)bFs-PI*N@QvL}hi)mwQ{16TVZ}J&;A;
zuZTBlnK%?PDHnX+s7nidqmd;zdarlZ0cHOn@F!}(OQB^xj6JX(SJ$74(~}KF+^`&R
zJUBa%j<%6w1AL`vwga9r_i#x$l3^)UzFGSa`CEK=v${?9Zg+i8F{3jL{R;5<0axrE
zoFWU=xQ=?oJliKuMRGo`*!*^hvsaqB?TO*SabF7R#+{g00)}EcHsezn!y&ZLJkJM9
zr6heCF9u<a#uxCGEST6y6r@8uza~+b0-R>5Y#Q^l9E{5GV9S|E`xgv4>~Ka8`?sBT
zJz|B=Se>zf?!i#u8EVky<ZDt-rS3TL+?3wSMlP`N%h%k8P(0ExhNGPe1YhmcY4u&v
zu!buvAOV&(WN;K(2h3=f^%*!lC6@YluFI7!$R)3fr<~iiI=3-^q>zHMJXuIHA`WeC
zp5!FE<697s-Dj}P7RPL&3*hC~WaIYHSq3m@!t<j<U5t|3R7b=+iF!FvKFzGLVoa~1
z1lQWTadjOc-KkCC^IdbpTRox4SfkvX<7JEAO|mdh7>cD@tEMnUUTmf?qH2CyNIx9O
zZA{5qpaoTTB+nJQ7k>C^*i30p$iD3>;;I&!ZpK=iENFewx%=8aKTxyC!5!D^F!8a1
zpcyL~67t|@wwIYv-qn{u?6^8*wtM%{pCI4UAaY{mYW&YiH+K@XUc&32Z%P?{(hHb=
zfix59*FI%~=aW9k(Wx5=$$><{DJ-iPzgX6F43en9&-)E0wqM@_%xa`MSkuJ#@(ca5
zo@9$Lpu~jrD+k7gqwf}o={4C?PN0tZ4W^=p6kH|&<s#I6n7Q-aIsEUnGClWMkWzP@
zRmfMkGqfa;>Us)c7ZS<eHuOjPrwB^zAA3zagud}}2eHg0_Ui3u3F@W<(_*SzG|x_E
zF7Ca+N6{S(xIitZw&f9%%{>kFqt}oyb822Yns}tlSdS;#_a5!`b!4qf{A)4}!!8S;
zn;T{1?gH_0*kuDgx3phX96b$B(4KDZTP7kLDy%Z`vf3rED_L?Ikm+wEUPQ0S6lMpU
z?CtbQ?5<x_%)g|C#bu{Gk7|h3E;)Ho%zLmrAxSyoM>6?!nd8l)Z~arZ)@%WNxF*sy
z#@Rss^5bfgKcw#w3})sv_wp5U3Ot`=^fT#;+sk)rbtrV2<8D=TR4{IS)zU@G$ro?y
z^MC_49#zM<ZNLKph8jEKGkv+H>Fe%P-)SpKaDQ6GT!(Py1dhK>(~*hp#+4XjL`W1y
zUZ$LnMmC4Ko}BCrX*S@P2arKcz$->a^Yi#f@AkA&z!pEvex&c~&l#2T=PMis1ae45
z*nyUJ=trIuxVE7xJ+>MFf!S+(xg@r_af_{PYDC>!uWu7i<gKj3EChLc(zc`YS+*0K
zU`p0Cz=6Gm75XmW_($p!bBe>o)F2t%#R4LsyE5}#K}Ur=5=t3>UYKAphiBhs$WH#~
z^6hc)^`jy^Aqs6@Xs%fOZT{jb**<?#=TssomEOo%TT}V(#?#wstu!g<*IPwscJy@0
z%pA6hVqvzWtFTyMeJKFHRZ1_8l$qsS(J=jLC6u*patm5$sDIH5P6q!wHsMpn_TjoQ
zkZ{-hhS=yq+yDypI-Sj)=$Vx0qMNOUmvAZqPh?1f;-o9z=Rt+0i_w_a0R!yfkOsg3
z)v+~%D)$Wwp4kwMkK@EdRjQPAPY}DrzmV0L3C2uck~285%U_1J;Q-r_1^0-z(rJJ(
ziG1x+@c!B6j$(pM4d}eb{<ca(S}Q!f$03r(7I~pcW4}4lWmeVxer}b{2NQ21_a^<d
zUF~_zlQpdni9j0VsG!%Ko|3P4f?lugnMZsk+ITOv5KZqdm}UCxWkg1I<e5+x0S9a0
z!+<+h6O0;qDNEqTj6H{gJ{Cv&Pag)P{zsmH2|fbsmyhgm62_fH6hkCm-ebJQ6z&mO
zi^XOmkw6`^Yr}Bn);KYty_XEWB(v)&1*F1Nplr`5Hg&k_X3tLumwK#5tUb`agp}P@
z<az7cEB-H4khy3^$;-$|h}6#P8_+ao%?A;H2v9+gbLwfQFPx=n<01LV&|C=mAv_lH
z8DZ+uo_f>OUActuEAS1{Y9O^k*U%a1_%(Ufp=PcdsrUeEA%i5xF21zYJmtbG?EB$~
zQMiuE7_sIQW&X|2^>q&qgb>SH-HA;x^&a`qJ&Ee7ytXPogp;}8lk}%zia7rByAlV+
zQ#83>-t38h!+99kCnWfGt9)glNPMJs!M09kXHkqP{1FpnTPGCsqu0DT07Ju->*2OP
zSum$bhqp&N7oGPCfw$)Y4DoE(qIJWrXOSQdGJiri-Wo5ng*qrfI28@7X%GNw^;0&V
zN7iKAHKm5AYVcbgEj}m&!xU6+B=}Sn`kgnp--we(r`y;|Gy6<bUk3|9KJ!|i;Ggc_
zdPE4?cec~Aa~lOwIxZKFQ&fzqCkk&xcjypX!Ou&B88EB&08<lr?@a~3D_phYQsV_r
zhI3~;?u?#+LqRs6Pw%QJ?@R<7$H{-OV^H1VNTkBIHu19G5XIh8j32wp@Ap!h-!dT4
zLtJV^=sijDl9WU>Il^hq8cv!5VaKiez&_mIR+h`h@t&=b`n7$XpP(w%!xWzKx{1Qq
zBo0*q^o?%I49>Mfo&rJg*v0Yr9`*}+GNmMdVkoZT1(9nTJ9a5^``joSuyNNsY0TOt
z*B58QE68ZdqN%B?BhnPmhH)Jp(?G(W;CBqpKMfV9#t#+UH5rP_Z=Ek3kvQtxRGTw0
zkW%>o`l#L}Cb{!J6aMtc`sH$O@iM|`Wd0@;()vgxbG^Z+))xtdc&ZG_5Ti_U0b{lX
zW5=4CYO%tpLWSS}@x!qMcSFs*erD2}0`v1v2%B;G_pXCkhyh6+URB!>BH-J+2f!yC
zg~d~9+_I_|j1_6*F5q>(Is#{*$jGZN_nqVTDiB7b@BYB*8%wY*mY2;gBaB|2RtfN$
z#?Xn2S1%*f`^lpWI}$hWfO$32g_g0#P(CVT!Dx7nIfH3zl0gZ*FJzdf!?dCQel4@>
zy33|gWaKozk7ZS?L{6J^-8!R9271aI1UfbDmUd4lcfIE_&+xQk9tm9Ii~XU~>U{tJ
z0(tU-6o=pt?*kkPmRQtbMV;S5DO?{J6T})ViB@Ky!S*b-2@FZQ+*xz%1%ukIo5rBb
zzp@K>`&}pd2pO+WlthkJ2{ANrEY*#yz1vzd7V_%9B;~YQv_C$ipCS!k`{s4j{Ks<f
z$V<18m+RJiNndyttYc<vZOS175|3PrGGgDnvwB;tGsX^63bQqgjAZogSSYNJ?4n?+
z5-CKy)luNtB?ms&FZUiQsf7uD{xor4wW=ui{2CA{_J0K%rcq4{m>>>#XJ<*J^Ba<P
ziA6<x;DBGnJKxw7ujWI<h-dW~<jHss$;ZFq)RZWt9+@^@kks?F@H9N=vO&pf--hDd
zs&+kzeHG1)yBI|z<z7W3eP7rAyw43IPSpTvvrNB6`piPff+KKbkFyc4Lm*$xIpLbM
z+rxp{OE*z&Az~b}($UW4Zq93z=0<QppQ47T+@MDguDxc^Y;;5Wo$e>C%fN^50!HT^
zOt9Z|c1<qc5}39TJe_H0bbk+T(SoxHi%M3+lRC$AMex-8kTT@N;HD+}mJt}(<>JZL
zxDLfrMkc@3Kh56qupU)Nhg`twW;((Lu%H16^)7}-UeYd~N>f{cAo$*#VX=1)AElwS
z1SuIQ56|*-Z~O{CSeiCm;2=h3=|bw0AtLkwFc=m6RyWx!Y~VM3Fj$<p+cr=G^p5=4
zzYR1JcEe*gM!s|s0553TG|mre&_CZG=eFbB#(xyfD~$zosgDxJl21P<#p7e?BEyE)
zUjs-Qe(p;|lQ3H1b^JF35v|}_F_`nX$Uvj5@H+JJT>pew#^X{Ee(P3L9>160bvsUH
za_~I0A>MYI*xonod=~A?{hk-&SF?faeCzzdt$`NK!eh9QBXwd$m8|n+@aISadftTs
z+gf_QFawQc*w6*zP3;IMZH6br%kl@$&0B(Vcm+onWiomkn^qSHFlm+8e{I*y(6D2A
zVt*W3|NeW+o_xlSusVVmQ&%Qv5;l-NWOD~aJf@+al*ZX`#sh<CYs0n4mLHieV&7Po
zhp3*UKUtflwxxVt#iUeBBzh;ArR^pZyxcoox^zVCweeq`Tq@a>sx`RRjS>r$30ero
z2(k>bK&PGxbO)}7usv?aUJDHetbgH!f(pw8ZNe3tO_V*O$=mN8-ml$R5j@!BRq1n-
zEcpTb2Kp%qwtDLyc{c0?Lz_~(tEOa-X=?@KinN?x-{fg6Y%RP8uNBpZx2(K;vZNmq
zaI~e#D>*a9pZ5@QeCs4=29nv@p78}hgL4X8k20I9+^o*B==sVVx4x76EM&n7m=qKE
z=wOkO$mtm9D6A)~qdiPtXY{1Gqp-ARP99nBe13XCvCBnU=j-e`G%>RNv<A#a;5t4Y
zCJBFsH9Et}??yB-Z=f*xU13cXe+cTkdiz`Y#gBurfjt&<l92+j^$6nJLlwy)e^fO7
ztaeCJ;w>dDyMkIeSy%;%m`xQL<(uI1%X1X6H4)H1OJXZ>AG7kYF9kf7ji!V9@xw)W
zO&*EIsn^zynm$ppB$s3cNr<<9g6K&uZ)dDpV+<T?@?D+l)-Bc4KxRTNWX0Vr61!P*
zv(L6;`8Fia?B}~%?RYZtp+qqo<%b6g;%mLbR)>VG^)g%BHAJFF<YVCzXXv8)yUFz7
zpylfq-tqf)cGF<moj%5ERQ0r4RoHDo{dB$~NCwCfdkh2`%~LsCSt)E~(J;>!Q-4nM
zt<PscNy6yuuZMKHFh9b&p6*R?{WJUyIpm|N@#Et0P1~BecRJet8)N{Kb2}@X3;h|^
z2JC=x{7fggd=))cB`yr+pnxjqv@@85>LI=ta-L!4dZ*r&zEEnw4ei)H1tys~jSO;%
zS#xeo+#hFt#ItYu4O&zJG8!m`7Jz0%Ipm0A?<+7)x=0HaM8W7_sT%)p;2jN_c6>sv
z$WImA_Qf``Ts^1DN}NYhKJlJJG<Zp?np~)K#7Q2}*%TFd60H6=WSG=#FD-H2N?m#Q
zNb`)%D(=4G?paGzbEK%pf_Yn-G7Q$qR*Ne0Yb4lXEvLZpiuV4<^B5;mstju$BDq$a
z_VL>njiZmt&`b8#$?<6Hki$5afyIZxR4maPy(eNBZPgm;dsM)qB<W1{+I$wm+I6<7
z1_9Up1qUk)TYNo?_4{cpWD`?A-%gH%;Rxq?mjq>aeWw<UOYc7+K@6oUgr)acIh-o^
zO>2gn9g&cmWzx^vhuRh5sF|Q-{CSxg4F`9=yr0V-+FsJ;=Cedz8u)e?CnIX2J`>J{
z^v=2p?d>>bqiF@6BVz7(geTeedaSLO@ft2$Lota2h}+YA@>jjH*DootOny)u^36P_
zAe;)Q*P16ZhY2K@n1E(Lq=B(9W}k>lyRe#y`RX*H=;l&GQqXzbz-(s%<gS<dI&N{G
zwntU4QOXKG{$AF|j|iE{@{G9GTfx2KACy-~H$7VByr0?H^ag<O4;v@D<xa(qZNd6q
zhG>0?{nx4L=wYlqb}Ufe$jFQVk@C99!kz=cdL|GNu%hS{rk-lup)ZIVSO~c&k6u@V
zu&LymHWHqfy@ag21ntS_mlKCvL}sP_>(}5(ujgCX9?QM2ub1fgV^<C(o}^Vw)YBMu
zJh9UR!KVq(S+_1%v~Ui+VuL3%<v$GAozkwE9_Y-(y+zC9x}l05JFL*Evec<NcrJC)
zQ$X<o`Y2#G&@S4@{o9*BP5bPJ?d;vZu>`5kj}CSn_|+?8iI?~O6HntD8|$TSdKs2p
zb^=?i<9#i+a=;2dz=QIhvCN!Ne~~h8Gh?A&RhmDAGOR|siSRd|$c2zdyaW*e$wF1@
zw2@v0h%tO$A~>OOH7Ze5Bo5IXuPLM9EbIl+iQ^GMCImU5di^89$>ybjvI5ULTzy_+
zML3dWG$J7vlQnx};lm*nhK<*c97s;46p$#o1$N5Oh&#FHFv;=ypm3U}g(r;N$0a*0
zq5Ze`8;(yiUnyz%oHm~CzeDOyw!0n@a!pu93v9YA8!0as;l$R=VFR>|xk>Y*15xhE
ziV2Jw(*xA7_)$TtDH0$^4`nnU0lYnOv1%#>y-KW^24_xOD?+8hQIIWr7a=n&{lsAb
z$URq~9Hq6kIQk<m^=}g`Bn(A~V)@rwb7kN~4U(U(qy2AT9L*Uq^%lZ%|0vz~AVBqk
zj)kWCf(15EZxg<sps6)Cd17NK7?dX_OELfbTNWA{W#~N|FEO;dP*C3l4w_9(qJCw&
z10KeZ_X<deeS%xdiv=iZemgaNHX5<`#ER6*^W$hmDz;b9`nx9!Lcoj}CvFc)y#bA|
zrD~4F8mhp#(J5f@UTL0I(q^HW+2NEJYH*bO+AWEO#DMQ59kfz2sT?u#K7bpHQ!3QG
z8)^{%iraG&#5XS)?SBJ_3`j#zuGoT@R6B)bbl#AaV;$GIO{Ul`qTtHme(W`UUX6>@
z_N8#6$3Oxt!R9H=VW{3UqgN4NqYYq;X+*GM_NNLV{umY|rheSRGd}up4^S;`vc4li
z#b5}D%EP>)^%sQDde3*|b8CcPbV>p@(<Y0)W4;DwUzhV8?S%`S+5X{1PGN8jMQ8Cq
zt!bx8$3pl4>hIz$V93CbfU7FewXy@Pr;bb!JCMi_vx&}wL=wTqEx%*HSvI~I!Ucsq
zz;T+V8wS29NOc2k7{mKs_~}c;fFNU5khx|4d@&6WfVlvyLs@XljXL}W6RB_>VNM-@
zlfe>ubxv6|=XZpy1|J+5Xqr#urvfyn8nA)6c{y>(7||pEXbKne^W#Wp3#H~R1}WWE
zGV^Y%JDASEA@*jcatJ=C#BApusr@hzU*vSGqy$3sG$QVronf!N2%xu*(c}EdPE<e|
zB?poD@klQIS=D&Mqh7hrhyIVDhbB0{d(4JieBfv(5f$$w@61<~QMzVK`X(HbkN?X9
zmD^oRRE!z~yx!RoY=^qx0_ySq4pAsD{C7kF&dn8)cVVbjj3gLEzyTPtY#wML(dUmI
z7V4=WEv6L3P1yn)k`5bnhj1QlSkZBr>LRT~ep)=K(Y@tF#PR_`BuH9wKT~X4CDHFk
zLL&s&ASA5P8*P9kRjKB_d88yRIqvod$JkP_28P}!^m`1|R0|J!I=Zu)DmE^nBXjD-
zX=|oF*$Cgg3y4Da!+){6kqZV;eflggIF1n#rL5tx-f2zJxC~?91*tf$*0DwZ*Xe`9
zX90J?LtPOY&7t&xSlsui#E{7^6=Z{Mz+2oFiak3ox!Ow_%GiDJ6)vC3CArAyo%Whx
z&HJKDwSU)m<d9c&nu-k5^0C*z^Ichj1<~MaHTkL>Fe2`vDSz^rKW}I%{1Hw@B~$g!
z;?yN$H^L}K^`c$A8#Bi*GVL!;yxzOO_CA^O%q461>i_mhvCtoV;8~BA0!h_dF)ZGX
zy8>-GJc%W;xLaHl9mK_*`r>N*NeonKIR(?T=ysR@|MbCkBC@0stJZPnKkkitvO;~0
z+3re1GLl6?ev~1o-FeP}TF*IftD+rCiGXbi${`j%2<jmxMJk#T^~vW_O+fr+DOSAS
zr>Vm1w`QOko7rE%ds#Is?vO(mQO&4=^bw`Nbb=}DzFF3!pfJKSch#Z~zaU6e+0-Q#
ze0@7qYD(#svO!E7jB;EG+%`q1L;a=|&-|}Lqa;_YV<S_qv0ZAyZ*^!t2+gv?`dpH;
zLI=1}CfIY1m-xhz0iT6K%Z|J3W2V_lMY7TOu!9Z2TAhKo*q=`d&_}kBZ~MKuQ}Izb
zJdG^zP7^$Vl0}KGE*8DCi)Bvl-h9uQU7KfPP&KQt{sD*Et#ZiVY!6XXa@hRRRyw*_
zza|a`Jhx2Y$R|o4M7Y$azQ=`TEIq;Nc<23N84B3f8wb?=dBEEGizPQ6P&}!+2&G^R
z2`wU%Myd}nEAikWs&!ftny5k&@XTQkqTMv+S+`~9Zv!*cZGO54+4EIv%SQ`*!T4;4
zbp81CurAuG&b~<rukeBqxPfs&e|}AP4BSmRPVmg_6?4V>CidbehycJDnPZ<IZuG%{
zbRGxDbextH|1c}=t0HH4eb^u-)RL13m|2^L0`6e6LYwBI0zv*@?wZ2`{5-e*m;-B~
z8p4$EaHbJ_T7RV|_6W#MPTPyd3^XUPnPrv|9uV&rDM!kGUEV)NeB&7k@mqJHW5SwA
zLTdYHn!Ru;^xelgYOB4&^>l8bIk%A%PKotNtAB@(R8?Yyqd1qbhW+q){2`pWZMs*#
zR$C+w-FKjRi|ICnL5cAbPj)qW*jH<qn5I~!F-~*s*RPsf+EB1!G^vhzs~||jxCK;e
zqo=xb;y0DHhx-lG7df3E)^r9VveuRaLcGoQKoJYaEdY1mZ>@vv%|En`E`bhl2zHeb
zBvD>tH~+B!nbW9FC1W?DExxsAI+;!cr_?N_%Csw}OEDaDWocMf+3GxrB@HE<3PT@x
zDS*iei-~eed;Lj+Y{*ObF(sWMA)y}5b(x`iSV)D|Ez`eM4-1tKl~}v|2nm;zm4=TJ
z<j;(xE#@mO)B+UuOUux4og>=iPcR+z2S90?w_1p$dEOIvi-nx*TvH>cesn+k<>hL1
zw3JxczF9*PG{29%YF!`|CpRsD9E7Q+s;;-H;LAbl#t&+Y#p=cfp}(~cw*4YyK)pjq
z(~<)E)j*?pfINaQtc{L_WBlF5T8IBoO40N>Ww3Mjl$kA!7lcU7ZFdjedDdSmQ$*QE
ze2T-n!Z<G_L9S>djS*6nTQa6kPw&lOJK-%DdnbwH)08URBZ4;d>5szk+1rK+0NXT#
zAKf9HKl+3qTTGkO`P???i)}V!yd^%D^aV@qgyk?Puv}WFz3Eh&l@opT2w#RLB;Mb*
z3(SBoH5rKAyzK;Lz|*^O!47_}7smvL#lsF(_<=E;SM`jlxkU|GvGen%s=<=(CapIi
z5n3npGR?#>O;cZ7#7XW}b1uazhB=hUKH>a2`aX;s_lTJOs~@q8xO9z$GlN+$0;`$L
z_GA(s_HhWiFJY&QA@BS(RVfd`bh7Q_fOJe;K0|#h-T>^uD=)QXO~l21tC)}=N31-+
zukHSf(+?HdY>1fzPc|s%6oR726mEVANaM5GkWKBTQkwqp5x>k}?GPl-yIjc>q+a%o
zIXVygunwDMs=QWsZTGP$jX~q}vOvog&3fWFF?8@<(e})e_Fi41-OSOj(3v4J++^(q
z5zoGLbux9qOSv@;7CVX`y7&JZ2I@ChC5wW{iLX{+(-Bxax3B)5FL|wylyoQ;5+yiE
zQTX-KX0S|j$HhsQz_G0Fo-UbqHkL;jQP7+CLH9C}(mnbuF1fjuEYcu@j$$#+=r2Bs
zFe}S?#|mbbN8ct!?DP#TQ=dV9YLxZSRr-%?={31<R^Jj(K|0ad>R&~s-euhtrwPvf
z45CtFdA#qPas-|qVyD((XJ~TKA*a0$srfOjbH+lfLunquvdCu+AC@_o&mRb_eZCkP
z7`6lzXLy(Z*&;hUON>FbMZpCP3T*~(`OT6;6J1rZhoT^Qboe$WeKMTo_L{G9`Z<#3
zAf->9h@73<Z6ErT%YYYu(~v!tEBebhGY_|M7^_8&$zUD*K2J(St@EbT`9Umdrh(%`
z3<er7)GC$5>%%>Ow`3)@7+4lSdnA$M=cMGAbK@R3`PjP}XQfJ4l{K_mN_ZQ<RHBhf
zU(wMG;@A#=U*~cn=AcY~pIpr|H^SJEWRLx3h&))uOdu(K%OK3#ew-4Z-qHC>1~Kp7
z`~0vyY>5qXdQ*rfAeF_rvWVKeE-UNVe+IEgycGTb${?#UFYZZTCCK-OgInK1akJ~*
z%lQRMtb@;0egP-}<-UJGCZa3d`}c&%+LcQ|#JW%zgl^uA4l<W@&eCzM3vd%DLZ638
zdO9z6hXq1WU5WD8%BBKQGe<KcPH&{&`6l4q!7K!>kY^@U;x&;*Ckt=%=4m-wQPxhf
zIZhl9xwoo@b&fCMZ2pZPs2d4@@4w*!o|j}1Z--OwTH<#Pb{Ut3WSnh<M8;9Kc{ot+
z+E7JQi+M%dHEX0i_u|2c(^A+O>@T(97@<yU%d+iK)^@+4KJut61)LYg)jEX%d<5$p
zgiKiJ;d4Out>QwVH)OrP6#^W}HZ{O2Xx@0D>aZS*!?K}}9ZR2{sewUB;wdsb+&(j=
zvHZUj{yt~EmeF;UUpF1b=Q@z_m_e&^E4A=CKqY)Y^p3OOdo>IPrDm3?o?<8wy?u54
zE@7;?hBMwn`VH6fB8Sj-qo7{U5h#*W_UqpxEZK4=CcgTvGZu)UVLwu;J6<E(@llDh
z4t1&&k4=u}9u`Xxvb5p#?%>ATd4;{KZI?6Sqi%TYEr<npFd<>~e9;1KqL6Z}#BT9s
z$;t$r+oM#9hT8QEytxooXw=iO=e&#E4miN><3QUt7JB+nS(|joo}A>@@b9ja<4ZS1
zP^qd>q&{qq&D2q|4yF+;O8Q08X?b%-^j@q+VTm}SeRU>aqdy><Rp3bzq$+$30Qjk?
zvCQW`pM2h4v@X33x|=8J)BnZt^UVejEg2q|8t!lbGA>w+7CYCrH>B!W$vP?jO`*&q
zGxzT;KlqC;&MDX&sR*O9+Qz`8bMo_rR7tnw`d5oTZpm6KW`sKItrUMMXB!ZP0F_=F
zusAe^dSHOAiuE1zLEk$NzbWT%4`i|n{YkNBX#b+v#lWtF+6;Heo+B?Zx@;?jRFHFT
zIAjb_Nu;lY-%8^DRqXPmt#3I~NO@2Gq`CnV4~fZX&(^VpxlTPpy6>xVAELAj2;?Kh
zpC)TLj(mnc{TJRm2lu}euE>e|wSRkYfY>_h6{*xVBR|m1!3jCl+qli#23BR9E_JD?
z1<!W7V`PV?j;D3+j%M(@_!;@W-}3XtH2~q%M*qYi2<0g997){u8u=35S$`d1>YVVv
zc7~Gy_v7QJv74(N4Bwv!hIZBiz(KyIm*K-ukNKm(b)wa6Z|Me^`+*GDJj6@af%V%I
zIMgrYscT=nx82Wu^)tdEUxsjeJAy$ZWGXTogPw&yecWEv!>itCOc*s_$gKvITq(W<
zgo2)#w1!^==Df<tzq8DU_Ny!6@SwmxR>r8R=Eo>8z-tah?=r}|0}o*06$dE`jvms6
zj_yC)fad+paj)d31lf0e;DWe7Yg(pq|35a=o4~`s!9^ymZ)#iI&J?6UbQD9iHgPzA
z;}A>wXZT0;lvlJDfG`O~dlf?c!5!)Q9f1B_usyLh+3#9WaQ|1WxEjG{9Hc>WJ^wLF
zKJM6>6RoEhj+MmLqe?i(rEVeKu4@<VMrU!KL0Y5>3mtytubW`Lm8X;4pg{zrU;$4G
z1EckRTyXKr-f~{hXP_^T8RLvaJ~Qx<4=+t@;+{-7LY?&si~nzTOV7gpbhlXegLAdb
z?5yZs;<B+RJ_jZaHdDN^`f;n2Ff=;zaX&#9$I1QCRZr^*FG=Kv(NtBLYZvGG+3;{8
z13{^nJ$awDJzvxRw5ibZH+X^K4QjF9vCp8wFA_<5_GIvja_gq8r=_m_xK3dqUjR!@
z>#7?cJ5CYC9e@jJ@5r>UHPcjgu+h;pOBXRuaKgo}+;~@Tze~=7lL*8TmE}i&m`?`G
z{E>e9fAyYJjUVs>2;=IB&Fb1L<|@_@qb?NNXG2{4cB~mXehTn%ZITC;sS~^zEUsKN
zZ}#Z&JeB+7OcBeE^XN6~#)t;q-BNsr<lb5kF01$+C#Jp&{}dxzQqGI6r2=Athi0iw
zbjkI{|DX~RMZP5=MsYXqWamV@fDQ$izaV(%O>FDik=XZFsYI#`7Co}l0vqjYwZx_J
zsh}&QH@?u#tV%NoG##{bZXtQ~ayzSfO|AjDpwTJ+mI`$jUZ?<h0XmqJG@QV$)$|22
zM^7EjLY%`ZGY7gyUO)?<<Pm%4ef(H)N}|4tFs;@(1S=p%>mT_n8c;>I_kHA*w_NS|
zzJslDhLG>s{p)LXrUo;YbMD#U^m%{e_x*pcfLJ)za6Oo0iypJatZ}pj--bT;IGt|#
zuE$CUZ{r_EkBWhRXY@Fr{7g<$J7L4Ctm<KA^3^hMqE#2DYg=$!lhzcI!SB(m;QdDx
z^Pm-81r?Qr#*dRN{|`S0_~nmDxAFfMKgTI9*k9x^x`0Pzk;!?$T*h{PF@ep~vh#E_
z(YqD*+l{>kL0wZPuYR;E@25;2sQ%_??>S+|r#@`Iv)F!-WWuxJ{NkQ5<`?DqF$rJd
zWt8B!8d2rsz`*DKh;supgwTBbl1Mh4qH1~CO-VDR`tqLf%Ne77fgfYCo0q+n;2#it
zjU!_ZP(jMwnU3{T94h<#&@O>q<TaE{g2Lf^PVeeqE5`*Eag?5l(armdas5@-L`X2B
zn>>$}z!v?d;QD4t9iHJm9X+wG?%T_*8KHp=>rdHbj~JjG&n`DV1UO#)ILgker}wBE
zUkH$*S^Pz|?kd(Dg6yk@F7kvi=yJC=k&#RQ7B3KcExp^Jlk$hp_pldGU%a12cV6gf
zKCy~sIC(Y~cYHxjMX7X)xhr%)6jtN5<DRxRezn7{N7;Ba?0L3C&YhWbIU9Xe9jK@H
z)OFy_;Zoe2Ot(Yf0#or!EPYvVBi72mTMcFdM>%=*hSMDA4baw+_j42fazr-FdEBkJ
zL9xh`70BOnX+=9?HLG*WEL98xy~_tC6htl!10!3EGDSyrKx0lBLwtxfzL!JM(cv>*
z=SQjh3G(l$+8JoJK2iM0eIAx{wh;wE<JfTK*p_;CPS{`pmd<MNl6ubZ0AMx2Ae1#U
z<mC8$j*RXvo;C+PUvafZ<9P~iOKv)I-B&X9);`M+W?RZspK#vyhEH6dDfOEeMFofi
z>|bezzhdXXX%-lKJ=NpuP&E$vr0V7h#@%Cl!-0N`4Pc6=K1^LguuNSM(6i@FIR6-E
zmfYps)N-8y$g6aRqk&>nxNcb1ubVyw{z^WC_*K;Kx*13`Q2T}JX<C51u*!g?Cie~m
z$S-}w^o{CWYdpk?O0hNdhryxe)q#~Ba);<@B<U(|rFWPe=FWcCYRKQoL_`$Hd#4|3
zh7!fX1+X20qkI(GTsQQ3$}Bx$*0x6V1mvDw-A=5zYEFZ$Hs0HkdA+y2FAWQk+IU1^
zBDS`_-f?5sARQ8|)<}2vG05r~r@Mz-4MI3sDT`6&&O5Dkm41uXVclI4k?~q1p-_@n
z3lAG)2Flpi-hLvbzMjCKG<<56g_7xmo0}UfR@(JAI9ESJyj4fQ_b<kYL_DCF<jXbB
zs4Gpl{JnRh06>N(vFYXoVYO^u7BGE>16T1_N$$TDU1v=)(_>x3^%H{(xvw5d_<4?i
zX9-|TLqtXoSqr+TVS^sgb43WqxUo4Ndr`fuf+PIBg3S5xX-oMNzE?ZRSbn;hi<H0S
zCB_Z<XhmoKGMl6#_}=<1It|tRvWC1Rn2Zq^w}3N6O7NFdvNK#AUJREUk2~(I_8#S5
z172LNhlj2k8oz5xiBuCL=-@{(d>YphKh%nEy2DI3U~s6u?CkoKbQdRHOOvAU=GzyI
zetZ$<VW`7iIs477H_O&mM<Z#)y!(zzxl>co&#F2a9&wm-KlD#-)<sFGffOyE)y5!z
zUz+CF#esGsdf%bgS%lu0hlj83Y+ncaWfuHUP`BgLxDHf5w+z0yN<huQ-s!~4Daqv8
zMb;QKL7jm%@E}p?g!LN4R}HkpxsQwO2F?LqZ;G-SilPqy*)y%S&e-AE@en61wisUW
zZEAHdpW|ea*2SU{g!;tHsOzb}pBep-l5)!{lfx)Th2PUGjkvnuSM*;o#yym~pG2~D
z#f{3l@PRCGhig*^@%i}#h+USiMEYj=%duEAHF3_gd81kMMi-3rbtJHzI(vg-j21>n
z=qy@OWxxfbQ{~-7&9W=(_3~13*f+)G1)_??#qomUnk0OfDzX$?Tu+m3ZeCc|Oy>QO
zX5vW22+RC`u95dH=&K~Yt85bfC}4DyYF)lH(l1FJ_#-8#oUPI>@HtWjaUWV4_sO)Y
zd_^uq6_t;F$g7pp8*MN?9r_(cR*<xBB|LQ!P2+Jq>v;vv%JXgX^wka$d|!C+>vcAH
z*KlP!{Szd>PkOJswbQp8tNHT2ndLkqkWS6<aW=mH-S@j-ot^oLiT;aYV#5gZ$87A=
zyg0MFUii4M=fo>GUXz5Gy{;6^reQbs<7yL|ZBZgXvrOK}c3B&5*>n4@LDtn<gvaTh
zl6b!PQ*lqYmgF*3f6WBRX)0WYKsyGYS(AvB;^S7W^rkaD^svuxa74=TBP+-2JAH1J
z8~S49kiQ)D(6n&x;OEp#^!9vE?VW+fY6O_!En8V4>>M{zCipJ~7@`_(gsU^t82}`g
z7#NU}T(pLWcw9|3d0x$hcy1ogp3!)ouZdipw9+(PdHNNIp?;Q_3W$rlbf1K{d|Sa$
zUSs%e&hHK|#|;bd%b)UWILs^|Db|=Dqr4h7gVh1mA5CFX<ICGxs3KjRQBG#t8_iEp
zOCh#A5PxmFb}b`)xZ~){_CzlT?`u_zsPRJP=byp;_a+LQM(CBvPfU_ksBbG7FU09j
z8dJZMj%zG&eLf#yufkh4WaMrDQ}Mu833tHRU;;j)#i;#D%s$nh5yL;d1)DIJ|8V{|
z0fic7L-0YU+S&dRn~W?P!o`l=e1oehSv2XNE`Ce}-k&ahp*+$>j>|oE@V+|De%_Jn
zaUT8S>PSxy?A<DJvUyf?bui@yXQ>SH$O^}b@E639JUye)oXW$Mm7w0j^`cHgOWh0S
zV2g;O(w9Ko?^!Ocz5?4u@jHj%(JI`;nt5q|r3kXqqyXR8booU8toCN_!%c6$CDcLA
z0LqHA6RHUpWG<xu3LRywgsqk5rG>o?x-!{wSv)FB9=LJuy$U-AFLH_xh>&Z&`;PyD
zX0BwQQLdu_qNUc3h)*d#3(TM6-g)nXH1%&D9o*}#+amlyT#~LC20>$(oZ`O>KO9KZ
z`Eb$Ijppe_{Ou6ED*~@3H)zEjfow4mVEv9WE)dPCq=9_EiMvkBZ`H{BIQ{M~2(&0b
z%lXGWe^tffPTLX=niy21o&Sl5rjM<dJd~R331*Dol`#@F&7zFtdgHzo9Du-K!kIe~
z`2MB+XI9k2?Uz$J&4OC{Yyt%GZ{E5bSu`ufad-PagC><;BYhGXMxA$}w8~-oh>`Pq
zZ%)Kqvw~*Mk>V6quc`m(>GIG16n+b7eG|6O=I)x44yF5L)SFNEhiW!3ch_-3TbKwc
zo-@fKuWrt;>t%Ikp_u?LA6Pu_S!=$){w=)*5m7)n7SlH~<C$D=Pa^ze$Z77$;B$Tx
zE`oFfLM%KJnSSC3-_>~OHW;TdHO69VYAUg-v}_&UHsm+`vxYaY*FZ5yoq0nNb<r&I
zj)&NjnVJN%<-={_PFq}9Tul15XwLhbkg%SlA#Y^0`PjAQe%ltePdOJ_5*ehrTJnKv
z<mLyK@)893)OnWuh#@P7;IRB=@Xc$Aws(Nd*pEE^LoItRcOhOK-0PDQ6)Tt5z!O)X
z+x4>ay7Y{!+11An29GV?BgjXBe+Lte@9m_a6HX8@WWBScx1qXx{48H$BU5JHR;$CY
zCc^j?vC;iuG+M|C$GC#`!k_D9#nV$fJ^6w9_oT!*t{#TEP*tfImh~<ef)Dm4qrAsY
zJ5Jz{;A?0j(?9YI(f^-_Pho5Ae?@%fO!iKHXBQ|~1#k1oObL2iT;4jN@M5u@8BC)b
zfBy+$Gv42N{ASSLo!{XjCg~xeM!UzdNVT?$w+vZx(#V>vG<5q7aKLU$QPPy#Dscrv
zgZ(!?YqrRe2VRg{E2MK~f|NtLGc2v0Q{7`IIt>QHX9K)gZG2(<+beGH_3+x>s!vX3
zCkS;m<$U=0buelm_lFJzo0=n)MAU@gDB79ANnsPGcXl>F3Fd%x*{tJm;{=d&QJ9VD
zqdQ%ylVP{MtLwCv{DW@D8NFE+c)bO~E|hux`li=;d?rVN3BoVUo<ZQMw<H_CBkE8$
zv_OkkJM><v*sT&G8LDnQKIr9<)B!}#FGlR{JDh$AFoVqO4DNtJeTvu?Gis*&!~D2F
z2tFc4haQdG#VV)p!IIv5-x1ziO$x1e4Kq)cLcW60m5HFNc&7E8JMXjPK}1491JuP(
zY0CJ4jNr$p@hGX%MQO6$l0R@3fbHH}QeqBR&Hu12WkwiVJy0_jHabv})!h38aW}SS
zFjG7;fCqiKu~59&FkxCbbS>(J@89_daoqk6_edXx_}1aLrwhj#VXrSAY4$)dE8U<8
zY2^Q+17x8?C|R;FvUYD_7R`Qb-c($wQHnqXEak?WGOAnjX!{i^#|sh?7$dU#;sp{I
z(_*kF+z>{MIU|l`Tk`RR?GTRde#F0)ahKLfmr%^e26vFH4ur;ppo%aFstErD8oMYB
zmOW0X@;n{V)L8k&LO<C%o9$YXCS;=cD^xKDiVk!HeV}eYL$V0!22Zug)-Icl2Mrdl
z1NFC9$~7pm1>*>59XT|SP*#=)7vR^()2Zo;t#^JKUZ@#f2lH##V8zD3jiFjJ_Z!l&
znOqt2={qu9livyrY%1{)>W^))0f@7NTeW3X{OhX&Tm0kBsVe|b^w&<3LLP~)%Kp+u
z7yNi}awVuip8tJH(CRkYec8bnVXRa;QON4T^EJnakLF0e%|cbGL$ys&&u!KE=c~Z)
z#l|D?fjkeKW2=N!T>sU8<p*IIF*x69a=SoJ_qZKp(OeB2ZCxE4c%D1%dU{+PIF4?m
zpczo{lv@q&OD&aaUc6o8ki-Y+7cv<Bp&m2552geMSX^OYmF+_|h54hJ-`?VN=u!w(
zFMeN?C*Gm+6g5&a-i>6J2*y5>fM^SsxxG%zmj{LJW1qIUx=6J^iJP$LTh@NJ3iEF1
z_-;&Am7HZiF21E@fBfV^OznI6cm~g>uV4ry#Fb&5p6SG?K0oWD_h6t+);~>O8A445
z56j}8AgY7C2NemvBwrmKgj}6YPkUZuZH-@@&Vl$aWOwUoAN+Jl2%-Mc^xSbkliDdm
zQ#`$Cxu(y+HiWx1-kAdERVPb$=P^qFhKV{eqqJbcGZtljP7>*!1@_~5K7M@D`of@;
z*N~O<WVMS!Xj;K#TGak{=R&M%eBn~NN1S0H?`w(s#JB<xxzAIE?@P1YA0}<9{|=O}
zJk>G$_xwYl<ME{9^o9G0<Fxxx^40kk_+!#D>wF1(??|_+z5oG`+@7wo>DPdBSTfGY
zk7<^l=5hxCk-{Db%QQmkwwr+HW}qPPPEsZ3@G(P*gm{aV^<ab+XgoGQ^z*wo=lyyQ
z;62q6Uxr5!0nDvnL0y*HVRTyym?^&(!=7kwmgxKe-7d42LEy<{PI1SkYw+8*N5m+X
zaQ$1LyQ-~Nw`EHK)QN1(rniSptz2vddolZ|-$)&Qsc7U^4|EixMGm>rO9gO32u?Rn
zny$8qJWo3G@~=LeABjwL^ziGDc#COyT-Muf9eB^AWzpaLP6lb161j7s^jt88@S^v;
zy+5?Y-eDy&`ONeZ6!RiGk4)O8?k8<@vjv>+Dht1@&*xF`coK$9B&_9^vYY&7ugdeR
znYX_8v8fy|qlFT`7zgcan}0gq&W;H-=q3nGw(zffym};5RjP!Ni7|Dlp1Kk*rxNQJ
z*5>AGR<HD~(<J@mGw*tJ&%D2>F!kZqmtX$3rm`%K<oH_Wf}(tfnoAo{-uX|M;G%Bh
zJL;Z~R6U%GO-f}_ei`vCI9!J2jMP8<^3`lWYBt2v09xq0lPZ1n=+`?NP+<ap+jY%K
z6-iUI&xYShTQ-6}y~t$|q8yuKJIZw$Hn=QBDHy1kV7@`t2B*Fs#vCjnVzg9Z1*h>h
zHXiMEQGU}#zkn`AKRxXb%R5Pm1c3Hr_fn@O{MPSx#bmkW_kYSYDGFH-ddRSexR0f%
zRqZD6bXwQwu&LD_5V1nLe+OI|4Ij%aTI^x{PPlxI7yS5(1$hiE^)dR!nYmndMl(q`
zROZm*HN~3Yzlu@WJ!^>&tLkaFFx{;s!0nZ(3=d5W1bBGpa+tKqphH>WQR=-P!8*kL
z{Jt78g0n3sPzi|(YsD_&4b1QB&~j!61&!#(AY-G}0)`RyVt5H=ehE^|l8P=eNq_uu
ztGONmsDpen=c;IP+7l!A>8o0)0=$1RAhu5EGt~-S;CsO^7&#7Yu74J0w76e9I(1mr
zI~#;vQICM{^W=g?FKXtxiOts}pgnDp#uLu+JFxP%PW1^gy8AO^$-EfVS+{Xi<y^BX
zuOei}8hvE@3uScUK_uLz|EXZ5A;`z9ofjxMOG<Zdy^tI`I@a5u9Vo?O2(5p5xXvJR
zn6O7vU=X-<^cmgehEDDKOTn_>otwW|*AYpqb^&INtPL5F7r?)`!9Y(LzxqW>6fwXj
z6D%>vHN0!HYX{c+KknHZGlhc7{cX*^h=8Ce@obSqZQc%f^l@*eQptfvSO^2oL;B>4
zslQ1GLZGdPrQN^6-P``*Hx#^+o#IB%L3gx;Y7~zT+pjcAah(nY<4y+pw@1uNExm8r
zrj)oC<#T<x_;>KcKi9NyLFBS!+uoPLBs0BoUHtck5!H#>_(0pvwqIWg4d9>=#IAKm
zXB}?7=WritGJP0&{*w=E)ciklCs728g#SOn?RSdlUVzG5>wxtrrV-vR|3eW~q4DeC
z+J<PBw1li$gRQU<C+@!PS++KN;LOwj8|t)u&&B@n9z9qjb9_c~_Sa6G@d32niGY{T
z`~zV5!Sp5dpD`1R9~d*)-2W@V{`^;hov?-mu<k=7dD=k_qj@?;4_`oo15+BWc`2l4
zeTyK9!}_D*D7{@YXX8d;@yez%g`SUbSeJ>s-7jp-!zq+4w)Y+kl;kEw9=k-!gxU94
ziMxOQ)_hA|9_q_`Yq>atI`352_Hk%)Bo>tIOlS3<N%j4Y{1G0Pg33Mp&ikY5ywtX4
z%ZIy)hgg8HAsX9g^JY4YKP;RM&T<#Sx9nLVmQG7q?BBM$5fD&`hY>J*q~kj0>8fB9
zJtpd}mB4`UpaJ#PU!c|l6vhs)_khY4u+QF9w($?c`N`y719dcG9=bCSu>OOJcT6CW
z00>f0Ai|o_+I3oXSl3^~1+q;Cv8lwo_v!zRqbn4u{a52wY1f~|tyZ{sQ6@#FrJ=(6
zwnArn@|WyJn&oMg0p@Q*jz1;HQ$vPRG~8<@`wiZrm%r??niZc5fS*Xk{;ixn(D}F4
zEQZjv$X{7=qxe6w=GXug8AV^9qQ4&WSFF6I@;!!dD;4eMGMguUh-*alM<VVC;I%SY
zg(XtM)w^wLgtnV!2KcDCBX_|q@@xJ>LR-?la~S?TAL-g=aeXpL@QW}Y<5qh6F#<fq
z&41;|v1@dH$H^;y$I0K6cHv9(AEdtesfLsheMyiQ)O_D8h-Qd{bm;N}gnpF*`e5{o
z254sHd=GUf89doJvgHn)Hp6o=^~H3c2q`hwfQNr7Y934Q@+e9w#=ym3(&|wC0(jLw
z;TVE0_{;KbUWq%bP*pjO8oE*v9^$?5D*1U@Yq`Z<444D0E#32u)-krRr--jp{_T$9
z*&VxlEWzY5I=XWg{wdoOQT;E$Zyl3A108WmbKIqaS2ORievM}wQ*UK@6lXkgR8%A&
zQ}*5F#!B&1ao5PZ3~SWWH6$76$MYUfzaMX$5ZYG{(RXElhxjVF!S2i-=6rSKiYHRf
zdSCN-nmBn30sC!=2rn#BD#dm>+H4NnP*K%3JcSWP-G|TraG`s4he#ZYXt{QcSa3_S
z^wNQMNAFBW<%omn*yV$Pv#y$u&Y;d9>p@&@R_^yl8S8J$Y?JRo%^fCu4Ym$9+Bv%l
zs|V=9Fnx&h{V^d_OY9(C2k{A*P1_7Dm#*~iWBayd-Y@qM;F!u15u`OuWfN@Zf6&my
ztzKDi6l|V{A4-!U_NkNxL-kl)3i?JeXK71~oCUdBtfTSn4c3>;{-<4HK<@(2Ev}@)
zrw2RWy6briL^vlK!eQK|wE*qygxe76$PrXSv0E#~78vj54V}=Scy$=~n!)T)cqqAa
zl6Ah@M`W#0;rJt?g>V6SzZ4$E9qtmjc;!TL7z#F+s-GXYih$5(Dw}Vx7PFB7i(m*_
ztS2bNeynC=`L=~DbH=V0>K|&}qXQpx(c%>cO35K1{k(CGM2|Oar3I&9A{TOd3lXGQ
zfT$)PGZ6X;!EWR)agsM`@r3OfkiL~u)iGk5^Rww_8j@e$YeEG~2@qItCU%n90rNys
zV6-FhY@-0amW3|ot%S%$yzed1HBC{-hV1RX7oTx3Feq5o9~g)<CN;5NlBe`_UKY73
z_VA@Aq}4{g5*dB^krgehj-VY)E#gyg#mryegjn)Qf$?t7P%7<Y)9b6Zi!{q2HtNhr
zb9Ghh*P;!OtGBlz?o-9E!l!d}+rZ|rwVr$ymUxt3DO1ooyFE0SXj&;TL=07<OwUkK
zm11%ujTrK5IbU5<3JQ~vFz=@<<6Zf<!jUnoc+lCV=LM>&vAzc6MZ90qqPA|fGG3k1
zv3%ID7Q_ext(c9bVN^LXOfmglw&h`H!Nxmo76JhxeXYc@I@^upXShdS?|pi7a1H41
z;EYrrDklT5Wk>r=G7#k_iAv1I3Jw?=F94X-@U5XZ_5Df~rre-@XO4`+wyyr<$=(tI
z-o$s0DW7`n;zxFQvCJvbQq26J8&_E3w8q_Q&qi80g*Pn>%Ifi--R2Q$3OCOVJ)*t-
zHFK$LbM{bb_x`nKxBnB*4}{%0hP+LOq#3Q8mET5R=?EM;iT!wW`vV^gh0${jWUzHj
z5E^9NoVa~(@7gm&HOzTyio_p+`|LLK=lzrL<rlXv*HoU?nMoj1(HsG6MxtNizGkB1
zjqfiOf>q_0sGZf~C=J^(wD4&O^KGu_Qj^`lbLPh+`-W;SPXj8{8#z?KzC8+4yS?Ts
zMX~^}P*hon$lBdZEq?)(94t%6RJ`F<PI|iFe#Glx31glgz~*n-ffw4bkN@Gxz{RfT
zAiYK^TxJzxOrA;MC|qt|y<Fy2CVE;UAJ&Lo0d5!PBeUdpIkBS-tT8%bnbf5X_u_iq
zb?o_2mu@L}=D%Tkjgqh5BLkjls8*^b8+Re~`1Dcf#tnF(!j6$f9kF$Q*M;h8NO)qp
z4e)Is$mqRB{u}JOtm*cdBYg%&LUz=dVQyU5fRp~lklMvNZX7^3hT#?aR!&K<dn<3F
zPo0>sF$$k233BwvzMEe-TP5kK)$E@6G7lkF2S-<c(+`83kVXlh=>@o0hxUBA#4e|{
zkyW1J3Knc{)Z*lW1W%aT6B-@aBZ;#=s&Jc$=I;PA0RLT&Swn$weBkb(jo1x<p_f-a
zY`-&PWpAVJTRQJr`VX#32>se<Cb&Wcjws3GX{pI29#FU5iJAB39++*=<1D7Lf6MI;
z`N$4st|LU}7qoGHVzD}3DQ;`c(*c(!trKw)*?bUU1}#09Y#P?KM(qpjyNt7%==!&3
zjoUiYAQ$(lsLbZys?aeSf@@Ofxrs;!2lX2)e;mJ{X=7-*y7@w~*0o+v3IpW_?nqD7
z?yq9Y9O~{R6&RT9kDQUl4$is5<==xRH#|qCXmOgSUkzG-&Qx`Q=(*YpMw;cI+nmt8
zeJPwB63DM)JBCpPadlodtKQW<{;7aG;^be%-Ofk3+5w1q139)Cm{!r3O{U6VoMzlz
zWfkGcD5<t#z`C-AKNqZKm#~`S+xi|tef5@GAjzR(Sam+p=~g8)z$HUZI`EKQPl9xg
z>31`#a!A_RO3e*m3FH433JH(Knt46#vcdt&k8K(u1RidaEqsSJb;DK8=<U^Ca8gsR
z=`fG6m|OZ13cOcG8Ix=LS>}@9G~dZN;C;@6#b!$$49Nq4z4?@Ew_4UKO1tA+4gIJj
z<{VZ5nhf+d0TfD2cZ3DlzH*ja>m>kMHv&z;R(-oaYIdlJ9AT<L_RUX>@ySx{`M!(z
zSpFa8-a4wPeqH~bfPxAvKqM3v-7QEhQo51uQV>D98!Q$G(w))`i;(W6lG5EsH`2{J
z@p;bPXUA{vbIy3r`+Mi`KgVG9T-SYlKlgR(3P3CMk^>;Gx2sgqF1@XQXCAfD2P_u^
zK3BoR9^$jc?T_?vpb%lLb(c8E;L3WGDtDwS3dK!EY=<>t^jUw%i|*4XRiep3N%XfC
z2}h3-Ke7zwg1rz&qv0OuE+K_KdcbX)k_((p%#n)(bN>y>=$TSSOd8-9tQj}LR~OUv
zgI92$$;pQhNS91q!?EKXQB(mGK**FEaEc_S?y{gi=Ia~})IOYAInWe34Tm}Lmwo;6
zWypjR9}haBF!xO^0=ro>2-@#O?sA334*=D90==(`k%L*R>y{MiaD2JJXQ<6t>b3<}
zUUE$i0*7zgMaPCol2ne48beE3I6DZ1vfZ1_?gK>ectdHz6JkdaOXX-r@?;T(o}TRa
z!HGS;m9Ww3Dk3${<tWlrD6CmQ8m^}~V4|^`V7M(&=rM7*S8l4330Zl^?1j3Kjzl0{
zqIUELZ;Xb!L<gE1v0f$1jlfgDc5845q!!O|Q4l0NbI1v##-ci5O9rs~p&8QVtf<-(
zM0$FI%4zYcf~&Bb$c)N*^_XaY-hJ7KoWU<jt0R5hJ_JyyMaN|neOb&;G)15=j%}aS
z>NgqtFslmhKhz&FvB-o}jr~%bTdrN@U@43jRa!<9Rdd`|15V(icZEVT4yFJ<bj|19
zPTF?5d2c>LQW=Inzi5!7MPbXj--a@ms~YQPt`7UCmuu;e)qmH%5GhJ<Xne%+dO$2f
z6YTA2472DDnUE`cjH@D53~zeB@k|;LG6utH;P60PG?TGh&%%O9S)@m~z@Am)UF$3K
z&mlMmZTNp)5uHaaDxz@anBMC`>8^!Eqm4{)t2K4@eDHx)=?px~mj$mu2v0A=A)dDC
zx;fAspt>-y`W|#JN!D|mCi7{1Pac4$k7-2fFc5BV$7Y)q0J>A%?znZl==s1EPGnfV
z;wqh<t$Jk(6{dA|noW@lEH;mG+#bXLWOf#9fwQfTvkld3Ff1--RDsxc2))Fz)T1ys
zo`<m6$4emM^!d5R8m<T<x32L9LV=Nfq`)7DRduP*$x~}iQiXl=l*_}ia1y&%=Xl6q
zY4#jH%)I0mvaz*wLc@tTTzP|I-23b<pic(|FYQ!GE^aN@eEzJAvs)Y-4jvj$_+8+c
z7-Sd?#M8jTgJD{$o*jxMu6lTUN11CN3#S{$?=|n31-ge2JH}dv3yj4<5M*mY&mb!A
z{Y{OPN=Wj4T*eAI$JbbGUg*z8RoO~NfjUQZn!1p_+*(OJt6VZFS+1EWr`e!m-Da)h
zn86Wj+bJM)ag6$Nay=J^o>OCJGPGcogDMrA+RD%8Xm(x<8<8wxbHom>T4v_F=Zvul
z8DI8x@MQ0ZZ^uN-+)4dLe#p7m=6Y=K*-+qw5%$$*!28r@E3+I+AU{Px=nsuWEXu=Q
zaj-cv|ENe@u%dyhv<Jq5-Jjz75sW#TC6idQiy;+N3*JfL9-2>4A67kMgcYiGhWunY
z=I?KL*=8MgWHY<Xf?WM50Hum2Rg{)1v3~zwRLshKCTAAj9dQk^?D;7jX$3m>wkK;`
zUqOFbK|6Y`NAYmpndy1%v{!lnrz)_o4Cmq9rEi^wC|^c-T={(X-Fy%oO-G1p#e_Y`
zcMCAEK|c9up{z!(jM`i&x5p7SyA6VF*_Z%o%iZ}ol|bk?W|wOc3kG7#3#}tL`mtG)
zwq)7MlFP`;@ohqfm6lZJS-OkX2p;gO=Ai@aNRVyF`{i4I&r`+#6)@bhaC-b^hA?9A
zn{|Bv_zk=Agm#DtRL2Y0N1J9<4UB8WO(uJ@yC(l$Wf+^#Gd5rYx<MffPazR!X7@J2
zho&nsW~q~L+~<x48$ormd~h-E@EF((+PR|XP5ZAM;4eg45651t86~jiRPfyf23loN
z{F2f=Ddkx@jegh!*{5wUZ2cj-pAQGVC0?>(uXQ#1;*qV+9`l}sfSv8$6<_r6nPM^`
zR1k3l!@_)x{-x^Oe@l-in-OmpCs_<~KrUCvM|$lnc_sMaTVx3ZVac<O%;ONHOVhyz
zl~@8ag<uB*r~7qt#^jHFHO6Lg8jR5uBfebmddq&;SCg!%(4Cn^+&@y6)GY0cD_6Y=
zJSas%7(P#0$8Cdu-{|@{b(kbA&wwO7$vkjzKd@{@vuw+=F&WxLAp9sDxwxz7QF&DF
ze0Fe244aZR|26<y$CYJz9yxRKs3^c08(i|urSvIQf0f}sy$DrnWF{T`&S35%Q;_rP
z+y>R~&LqEEgg5wW2n$#JTstmaS(!78VA&M(vv%&;?&LQA_bNT`?k!*ytOI}a@lI7=
z#FaKV9EeHv!wAUnMx5HW_LkeaQwP^Bem+Y=coZJ1Wel`tKV&D5sG{&<2DLi-|4+2K
zcVg!QgZ7iwuXl=}o-EfDZz;UU0u`>nC9LqtCPg#C_EUEG;5kCqPLU#tJ0#EGv9p(G
zKsyyqVVuRVM1LB^o$I?$%>e+IIyZ~{wVNN9F8*}v*&j;2U~M<C)ZQtVI(?Syg6R$k
z(%)u~<7W>%9L6%nX$T$Rl(%silNRB*{d`$(#Kb3D#4PKr&t~HR%ag}hC#uHW&{{cA
z(7FfJ6bOq-eCVnR8u+OPU!jW-6J85gt-@?7#=g<-J6_96T@Z`u)|L4moY;d;T)yQ%
zh`eKL31viLWd~gOpa3Y_ccWwtrJBpg*lJ$44~d`UBfOOKQpgX&ynWAJ5DcnIMyQ#i
z0Ov(R7n#X4JNjsTj6s=ZLKqW8N_A7>y&VacTB>9@5h((O&9YVq((H65)LV4AG8+o^
zf*W=iu5hmby#7zw$OgG&#}FK*X!;*|C!39F@n#89l7j1qA?aBvKcif5937j(1eN<k
zwz4XXoC6^IbhFZ{m5M~RG2rpFUd3SJoDe|6;V9<2d4Nhh*oGd~En#3&0;d^2H>A%Z
zM9L~;A+^;d!)B~jM9NKWE-H94G{M5@WS}u^k|TKHUX{tqZ?QP|@mLQ|-M(NpKXo|9
z*lC+yv@fEqY&<_sz+{VPgd5t{ed}gcId#%w+ga%Ix{7)6iS!ebsXSBB;;AP+)*h$d
zt%&9E?X0_H2(+>FCgkHL5q1f966}edXSC_EQ8|WGN7Fml)cW!HrO&kqCu2TUwdBh~
zw^T?Zi4;8imp^ufxLHgPSQJpvcRl%GAX+u~WNP|XHqtYmbRX4iq`iS3fXyIiiR{9Q
zMvag0^*|kh>dwfNYSTr%_!{7woFW}AV-LeXncG%sWB~5B2bBXHCay%uV<ykw0z0VH
zO1fEq1B{<kVTgs!fwQ*Nr;HN#{MquwIwG@!z=P9Q<vQ!ZNsz;fWFvEE{w4UJF+4nu
zfxp!IQ)i1yV>6dw4I11xf^8NrJbMhVgzNfJo8r$=!RJ>I_W%xMvX5YSve#sKF86_D
zH$<3E$_4Fi=FF1_&9;s>SUnDKJJ|^ha4F}|haZZwTs*FARqmh#n#p>>QX?dQE}S1n
zO}6nN3b6S<Q`jm-;le9wat2n)L=W%yTTD21&afa4zDGmOH`<E;KmIZv0dT8>cLVw^
zsSvwxN5HSnR9PLgcRX-T^`s;V<CmrgE7vmU8~_=LEv;SpoagJ$ViP-K0Y5H07)$QJ
z)3)}H6_K8#OI8n{>p5~%lTKshFUf%WZlw{XXdHd_$(+CR`chy30I&0q@egOW+6qYn
z?Dy+-Viw?(w(6*L>)}*o6yW~m5GEB`S|u-EhJOFKFqmOPqjwOuj1nsg=$lGCgP*;^
z9LJCDg?O>L%am<zHRN(8$V}7OeW<@p!_iO1-9Rhwe2oJ+GB2S3jR~)Nu2KCl&n0B$
zQ;v8xT|<KJq>K@_lauq4g)`?&1GAswT0y=!5r=6jT>|4^a9H#w_l<OM8b)p?*g~9D
zrsN(Q!t{j~!;UeFO}H@km92UsGi)#!XC~A&3dY8R`!hx&VsXt0$fcToRNGquda+(T
zf_}^vY2lfNYFN<qyB1N>tGlD@{quK{zeL~>fDQTwQ@8x~7vGP}m<j_=xOqix`}wwq
z_wDn^^<o3BjA=)t;kR?5AKh8s4OGQO0Zt_IXuyI4TumU=Siip;EQjHasi9c>1V$O+
zOq@$)i_&Z|lKBA?@Zg>)2h*WnqD@~Ie{0#x3TSa4fbZ78>gytZR4P(4m8D^HnE`%L
zQ2o9MX6!nK&q#u<72lDJCHuV+=K;z}r+lK}5J)J)7v<|0Ie(|0FEz4<s=;0QIFI?C
zF;0WFGr6!N9PhMMD=>*BqjE+<IZGcM$Q-uU-9XGoI#*xvw~@|DsV_3-Wt@||Vxr9z
zXT_)Ki^KtgO!@EBePKMez;P!|TT#m%!=Z?0A9?Yi$I$svv$EVLLNI|$tutkEO-tNQ
zo>5?;aEQ@Zi4nRel%?+fd;f7*5591g-fgFLT?Cr{K{1E?63~B4IPqWukGvO6ROEC~
z&DkwA#rT>kovKGHG0MkA5IWI?-4h`xipsThKyvqp$ypEH<KErxh(=syo;V^2uP#xK
zC_*wV-C|N=(!hyTZ96auR?YFluzVP=Fn+MfT(?Z>((4^80R2hLe{|;+{SH<)Ve-xk
z^EM_}aC?q3pD3h9f4s3E#95`Ft1`1}dGXD5Ib|0DldJwbB;q?6a6-;s-+L_!6MWjp
z%=UW=%)fIKx!vXZn!YB@S^H7C(Dj3k+7)o3!oWNWzh8Fm33mI=XQWVGe%67SlXg}V
zbWvm5G5y_-kjW=o(~Y3s-2AKPoyM0<(y>O6-qa9V5ZNHp4fv|++r)#b5j3$`@*_H-
zhR<%o4v{mRutH2FTBG3nI|Is-=f^!(CoN?~4X&noLVg!u*l5(H%(8Y1D0jHzgnXM?
zjrrB#Tb*+FpjM3bw`fH7v#BzP`&E&X7fO#}tViok+-`duUdTUq8$j7}O=fgce=dF5
zx(B82La`E<6?HH-Qy8DUh@^oljVTIq9q%Yb*sL7u8-+&D*+qD#WpVE^9GskBCw*NK
z>WWt(KtXL_rO64JbhFP`yGaxsnL*uf*kTWsfgbfazIe5iwevb%yn-<+suN8LEYQT9
zwF`!DKtuZ>Wt}2*0p?hO8L^VcW8ra4Z;*%1I(3i>QUfShlO6!=jJhnc_8y|~qHlbE
z>8xfQ*34$MP6llo1nx9_h$fr}Mr-y2?cwam)8`Psi}_bjmkI}8yN^B%PH{)yM@+z%
z@ldq=rYU53Z8)HL3v3SWZ^_PK?s`&Q>yWKj7kiTWz1NP4MD$h0jMwJekTQ=V*anM3
z2$iW^Wrlc*|4$s`RTmO}&k2rC;@6kvhf=F{v8x;k@TVdEs~r4Zktvt{^)7`6K3LFg
zYImy!=KvalQd!&XnM|T=WR#)6Fc&o&uHQDx;20^7#A&geemWADu$6~!Jz&+|;e5xL
zW-D#+8k}`)Fuh5sH#umjP3S__TnRIGU78SicW?UxBeBN%T_7{oj47+of*rUORhIEX
zZtc~+K&onI!sdPaJ2#65pA<P%uWq?aGeySYD#|%?)IO54iRBe}>2nV6{Ry|$W78<C
zXSleLrgXndV%%kB-tHF3xcJN{;NCL9=Q^*h`tW0q4D^%_E|kjB#Ae=6+(}HKu7gjK
zcf9^zagjl1nujkZWcp%4FN&;d?8ur)&53dW6Q8=;W%JicwohFScwjFg+80Y2B=6T;
zPO86rlXPA7^^T4G%Af!H)am4#zRxiZKoKN}c%H3+FA0B3aY65-EsF-&?Zj$)M;6e1
zgxod}SLsjEV20@{!e@g&Wr#!ZqMNf2<5qEI3I7MfS>`_s=i@B%v_Qshv8Z78fkpx*
z(5}~QaEL<1q(5U2Rapk1R&)F)C(sJ6Z!hYWX6}8|TABqTq!MNBoo0fPt>`uExQz9r
z$K0@SGi-#_s~8(QqwmSjAMncm?=-6wR4SY2QWhLu28AvQD$t&S4Oa;)%p`!_kFsTu
z;I+jZ=c`#NiavF=_4?=H_Toly{?~kGycbthuX4OwJOLTw<@Z-(YqvSFL$N9118b)Y
zab(Mm8<gZ&{UhV#;H9}ADH_ImeCcAqOW5F&<)S&J+@CEn+KqPdMju>Pr7#CFu_v--
zjGRfY#$iKv{n)wU1+q>IUX(K|gHK#FOym9QsIHu}7Z--v0E~`l@_KUD$hI#rV`tm5
zui2_)&d59(wg|aOB$fuPYwAPA_fPdgxE-G4hq2~WM6B#2bF_DXK68S3T`XODE5x|q
z8q#)gRgVjGSnspi(uj!n{Xs@ddkwDV7tHQ}>KERTnfr0>PaFEXgQkR~@kdrik*TDD
z`Ddacx78vIe>7QVmcZoTjOOjrXs)k!+8PgIH#j(F5TJPPcIZ#rm0SdmxkmmNue|<$
zT_H2YlvAl4M}qGFXxU2b{Z=?2Cn4XlB7!Qq;|kh+bDX}VB*3#d_RdevKFQ9xN7Czv
z&!$r(ga-e~q@L#qF04NpFmZ*#KcW~14xN!uCIUqKSkiC}z|Vc}*^PC6O*^T@WctSS
zpyhEHi44VIWmpAF>HE+;I0bn%lLSB{ewc?9l0Tmh?I-YAXRr#T_XiJcy3omTwB(aI
zIv_LN@__*=Ojm*iIJo-gLDvIbNzB~`OEXblauOga#fDZ0)AvKF$cryCeZ0hF=%eEB
zcfHZI&}`&E4J=Fj$KszOksb8pvV1D22^@v9hQ1v+g}*T-?n!cxjInTAkb_@3FKPnK
zq&kVo;5IG(5@4%S#;9)pCyuoZ=H!72K0iPccEo%VsrJ}8w@YzJ9%zog10_^n`~1RG
za-u^h#34PIAUr{y&Gh#u<(x7l5{pzNrO)&ts_?sorJk>%XEotxc~q#eGJiVQ@@_-J
z!q+X^6F;*<9u?L=lw&J%3*J0~DlsU6i!rgP4Mk`BjF+lgH#*xy%hv8&TGg>P7~dmX
zx%&7A8e1e#-5&fIa@bFJW3WR{boatcrzbsC%$|hTD$d<=$CNQn7qaw_clkPC;$MUe
zjDxXIyMD{JBT(BSZFrnS5ME2{z-l0RUnf1XH$pNTMmju$j<^<B<9K2hcq<~~;b;l<
z788DNqwa-3bRmwA(M*RA&|G5Gtw#i|h32h+0|^+TojdEC)bkqnK^^G)%S2MrGh@%7
zKYrV5*QY*X$iDOqZWT=gjdIRwlfI&~^NSn#wbG4N9YOd=R#)U(Us_6-l>2*#@8x}H
zNS_Y#LzVE}fq6w3QF#kA4>=$MRqBh}nOZ(y{e46Zoa8`4LSiavv&f>%syk`c3OewF
zPyC7-Tp6^;LVpYMN>>*L03Q{myLODxU?`b%bnPv{jDe>!Vj7XmpOgY#K+*HTi79q*
z+!mCw%<CLfM(q`^q^)VkiM8z!UX0HX`rjzMiXl7OrsdXy0Uy0ajBDz@hY+xdNPace
zaa|1!;}S)$xdV(XpMz1O*!om&SpJ1T&oVNy$xpZiy0x$NgH_4j&}R9J7@5g^lFz^g
z@eJ$S)B)>)n2UKe&`M_aif)%}qo({<`kYxY>wVTXA_pFmN;M>7xEJT>gZ_#N!|_o2
z6ue<W-(-we9B^G;s~ruJ?Qw|p<|vesi*g!S8L>lu(q~CwT7mb<tat06fD&}vq1Kw~
z4>)q7AbTmQOyZA{ahqTY=pS|C$8If#3xK)sMkXd(e+cEcO<}NoovrNzT1<>-{qZm$
zbOq&es@Gl~tX^3e;ftq&XU?c7cWsGjoiudUlZVQ?8Zkgt0xqC=I-oMF4rcT*zgWaV
z6h#z1y4!u-x|4<MYo@)wYUN!MZd{^T<xD(vpG&$I+qgZ&T9@cQT<$3Rk9>K*ZEoAh
z|73HQmux0Dj%oZCZSKj4&V)#e+FpMaBwP3Hwt(CtPhie>aaZ`7er@;GX>s-MjV7OJ
zOu3eQwkSDxt1w__U40}9nmbb^P`w?NhY<t;%UJ@nYVk_<)TtTy2IeKHw3vZ!^E<`J
z0o<+A9FX9@-g+7ZjuJ<C;{DxL)CU8)Evvzrm70zOF%gQAKQNIpg~1r(s!5L8pGt5U
zI_bgif!D_p_sum)Sfm#RVG4EP161Q@l(kWFbPY|1etpOA23cixhLq5$%&IcT#@q51
zq70_D2ZKjSwPT3~RH{S*TS4cJhiWwNV&{iDgQ;Y%py|~gX_SdBYkB_54meT}bij4|
zEo?qzB!EoGXN-67Hy1AC9SK|p04{~^X#87gT9+Yq77(!`ZLZwMrg7E$9(DOcMAl*T
z{q!Q2<MlSpnu|~G;UR+sD*ZT-t}a@rSZL^Mv#kD1dD24-Jc_<aF^AgfPed=&sW=aH
zxny}v7blcUFOR|09hX(E$!W1|!4pRW+R^3&3ej#;T-B8aR7xSs6s9s-tACD3tqK=9
zm?GA7Upr6uX9$YlApMh1g=`e;T~C6=f9VhYGcWvG#Dg4+sJjnNh*Cco>ZYD6GSxS%
z&Jfgb5rZMjyXrULJSly>jKeK*s0_Z`8^PyQMonNiGKuLv*q#4s&Oygh!0j=H=#^as
zL_oJjgio@`5T@YyVgn(O=kGyb`VV}{5|5&*YY)QWdlh!mT_1n^aX^gZ@Hm6F{=Put
z`b%CzjxV=hE~n9TqAqsp^oiLZ08ja*YmP*YZin2>sgB=+>GsLH#ksQ!Obq7$EmmDP
zUt!ewujxsmw_rw(bAYQHHUunSZ8g{z+wg8XnLT_t;#7Z(-Gn`J2f7WuIahdwfel8k
zRaJ{U`A1clrpue7>m2R<V4FJJcA$JVY&0Z#CNvG{?L>%HlmRY8Qt_;P8c#fEb&4B!
zo#XNH>+QE>?1}!T9{7TmH#G>jcqcy}ebV`Gj6B&$b4u?r`cU`Iq&8?ahQH~${*|9x
z_lHrwCb`hHf?`$R=%Q83kI7U`lx?h@M-PM<#^gnnWj6r0&pJbRn6ol|?0CsBzAlqj
zpcuPEiY<Jp$1ol(D#O;^c)hYc>;6ph{8Cd0j=}bB(ZzlL#d4MJ35(BI<&x^R&XScs
zijp)v5N#2NNKWPbi(P&?!Ef^v_x&}PQuu#^Z7H(n9!-}4>y(H8;gXM5C%jh4q_~rg
zbRM2xa=i}tO|_s#KaqgH-A&D@a|i3fXHLXY{U5MoSk%Xy+{zV@!2;w`>j>j6H)riG
zmYQ)B^2@OY0r9ruil>rSQPDwijExUG-bK*ay1#O`dhYrj;>cFhny+K18&0TWs_wmz
z;)pLxnF@OIpK^|CVy#<Zh4=T&;1xFWj@GY;vn{?R;#fo3&RAwy!hF~wGk5G@z%7=I
zdB({4U_Ub<eTUJoJotfz$!SuhpvWra`PD%igIQB_LTW|iV)E-F?B(JJD}KjOS;)b0
zDT*4QY?lXw!ngR%;&6wbn&Pz+^%}nibwxYE!wT2M&b2*<Wv}`B;Jjg04}Tj*bZ6u;
zs}~^w_>D1r=&yqp3S&9!ueviwWC?aG1tnF=82SfoS`RY{iJA)6v8E#0r6SmuGvsB#
z+MqFg5EDR%l%`408x_)w`#ts^0g741_`59)eu%6etmIo$!mtY3{D=MqKKTMxBA*Nh
zYzDhOvXj$sK4jSYWl7}sZgCrQ0P8vybm3Os$2;6a`ZV4+4Kpr5@mlG7YnA#lZE2?$
ztZk3@AIZZr-77sU-m+{0e+5ol_FjEI-1yV~#^HOa$@-%)g>KS`yQ4#Q2MIOZD-&+r
zD5Z2J=0BO`fT_Q%0bO`1A14y2dA4b3r?zO&gb)5Fj<<)^2Gu`0-t|H;5cZylhSuX|
zd<O>O<LwBiJ=4aXwXcw4-qadYiqSS`B?PVp2+Gs$Cs9$am(DwbwdFmbymxt`Z<-i*
zM^2qb(k96BkZ&V??b_|i^}jT6<g!ZeAk3obdNTEIkrMgJ5^XuuQ(_=YvrTsHP7Mwa
zme|P!t9dXFa~kXYjtT^BcA)@65z@c+P3b&l<81k7*(f3M1s@y>tXvh5@}a4<H?>sP
zc9jH6=iNpcvSmeX8Zu~drk|{u{}LQzbL$ZaIa>{Dma51)^T3LiTwc)D8Wma#N4Ku$
zP?A~PO!a_?FSLdL;E*19d846^!yftt2@(&@lLB*WpqH#%mp3DR6{8Yf76I9UUqz~k
zKD6Ln_JN>)firRyU~8j~@SYl7JcJ_rT}ytF<Hz7!@Ie8gkuQ)*rw0`;4V%~n;y%71
z6c9WU-n_tY0029+<C0N*c#gc@(6DGOggz=e?UtW>-i+quY19=aLXABD>;1%x?g}FP
z?{xRuh`VIgl%DB(<8Ml6ab!mt8i>^3fk%96tmAKdvM27+cX&{|i4Pc>Z_jOG6WiF$
zO>Swk$Y~Gou(E=3a<2_vOiG>Q<K0(Ktn2(*UP5@SM7ts03#Ma|1G~&5`=G-qqiPA0
zfSbvgCGW98H7so*bE<L%UfL@CGr3+`PT2xrVY|cNtZzl}l{Jv2V{i?4CRxkvlxlS2
z_a?amS_5L#vKu+WPA41m>xke?^G<_@;&7x=INssd*q`3^EwQ(Gia&lRqs);z&>RSc
z3^<|zkF;wby$G#jslO=dcMuHb=v&oMa-<l38FB`wDJp9WwrTkt(NgqZOCe|)i>v@&
zcS1#2CV?}kWPcOY5ms1*oZ7`dpRF37z7f(688T7AY%3-J_(+AcQ-0qS(5ZU}P=HJl
zE}_EkEvM?|rx(2*6@p4vUzA{9W0VflC;I2{vXbl&e<z^5eX_oTl*+FHB+roYTBU~k
z)h;7gQV=!DEz;<a$irh%01M|N^xu}F(-;6yFWv>3b2E@jc~N+OUj1#ynJ9V@mbiOX
z)}1~R;$%{*r%{YYXfn$|huUf7TFL0Uk*XQDD?w1lp|!>Gjvb?!bYe8@b~j^IMtx3H
zXBc?eC6LHB>6&yrDDFp`qMKk$k-^PXuSkrT&PfI1lN`V}Wp=`i$7H=|w2RM#aLfAW
za*;lJft54vK(*$=YS<vK%ixrzC?UjC3E5ggH}6RABXXc_F-)-8;5bQNUadjF`nBXC
z_-=%9n>pHa10i>Nfn07FnKktIk|<+{aqf+-6rCE6(0OOE0NlW=WI+jjA`>EnI?*A3
zRxY`6J~m$Z5kkFZ!HXUCj!Ua@C;!ECr8dme#NTyRWnc`g0Si<X-bA@X5dVt3>u}`x
zMSEUa3H{DY%U9#guzC@P<6o$!8xHU%^<>usV7Pug?%!J7+OtBhbN;i_z4RYWH^=6U
z(>?f~@Q%Ma-F1wAINcau`_guBJR?&K#Yk&p*~dCpwiG>%Y9mK5LLk>N!{%35<C#Q;
zq1Qvk<35!Cp65skh_kRHcLCAEsYlY>8w;|)n5HZl`rf2B&FdZ~6j8qm1M_<zA^o`d
zZwP7mqr2VRmvxbu=YGQBhMLl4F9%+`-Yr2dZzQ8t1TflUYD=zve7CMmxH5@g#rG8Z
zh*i=CKC@speo@2ik)?^*`{A(7w3Y9DO!Bj%V(?dLa*WX!!4hDNwvN!uBmau}RuR<v
z)6>nGRVJ{@uP3H@hqS(DvCCbItV$?;6?UplTx`4S!k9HJG4ceZ<|q#9hq}S^L3nCI
zH&*xQz}fc(h|mJCJEQX|a?P|c4voT>4{YKrnj%j^fF)ga0~p_&t`5GMAWALf9G9a-
zjatR48l`n&=v0F}IGxO89k-lzeStiCsAR3pHM98-PWSLXKp79;!oC&|7KxK;c3z%r
zZ(+8Wjitf93S^;UybS88$|u@e2tchZ$3hBO6DKzN@BX9D-B^Frn=pMNGigGLs{T=B
zF0zXPwPje;Gt!;20ec4X)e`U<R+{B+Sm~~oq4k$I;}a>GNY3{dI;BND;|4WHy`SHD
zn%t9$_CN*J%;G^UIe`hF+@ScXu!)PFoTxIox<wpO09^5~cvQl>Qod8GLFWrx6EO}4
z(LVM;p|0vmB<J!~%GnPRw{Pv7H#Vl0a{sU+Xedl$`}9PUMxe=-?#^38U_96^+4|BQ
z=PiYmhY$VuGs+!MVJh~9Pb0TN7rF1erTAa@N0Ox0X5+KPDi>;IrMF<|qZEGnux;Br
zc7^cPe3K+qBuG=sE*^0gFRwh%QBFBw>t((3c6HZteRr?U7u1#%0gyLxk3inyq<6vE
zW#>NhBRHw5HtfxpyG{4RNLA)}urhdNoV5{V^{_&-n5j)fsE^gXuiaIf*%ws4ot=_d
z4~*_LJe7^C5`4n%TPh$1=a1Dl8NNxc4`SiWZ%h-kV7fbKqNHLWy-D)@bNx?^(_^=V
z-}nV<*#2vLpUar;1}n_QgZNl>flO!2)N$T!PQ@E#I|0)_&}r16xYVbUot<|A?r4u7
z)#%(GqI7vwFBVg(;j9(3?(ti@`}Bz)U9N?ybowxJi&)Ti2#ju$bQ{o)J1iGRlCJK&
zK+9%{h&dz`z0ybPD2PPKNw`{*8gM+2dKGSR7+|VJ*)i0HA`MscjWFH+q0MVmR%it@
zb9W_R_(?agnl3EHRB0~r`YZ@k3oNP;x*IR7*_EiSBrUv?Taw);6rT0h$u?2H>E!c1
zU`0Lo06O_|R5`2>vGY`(dd_k<xbeL<UGywv;fq11tE%_o_|r;O4^V+kWh;jM4kBh*
z9%F@6Za-#B7f89T)_y4Lvc^&`0D@e{ds>+_IAI0#a9!^eV17Qei{Wq=u=7ho3FFDd
zZ`*cUaUK2}0Sjd`kM~#Ea87&~!~2i+HDvr>+t)D=*?p_yfm=FbD4&C!j@@EIA_N>Z
z7jJOX(qUit3m;!HySIh93Eo1Am1~v}kzZ!u0_}@t-tR}LNv|b!JNAQiHgcVb<DPd-
z;qb0l@;wM>G<gqE>j|sF*dCdoZz_jSV_bu1M<)vr^q!GKh+b$U^OXPE(f35ij+sL@
z=9~`o@iMULhK$i~maJSaEef)NV)$V$X5HY*V30M+oR#DLkv6QJ@AdJ$Sv2L`<o8DH
zG|7VTC62IC<mC_ZVxaxd9$ti;e*pBVb_m}5RKFiAu>F+!8k|A$hhH6Zb4WK%^@OXA
z0GNqcBc{A=%=Gt8wV)try=sNd*c4V`c1_nB?aSz|v5>kAK6NZWK67OD&iJkBb`w5W
zAu7-v3BM@<F4)H#pEKTFd;xZ^K<VF5FA4*qHcY*%i>mK8IEbB1U!eF&94!H0U_JGX
zc)GKKjdA$2Y$Po)`fMNdJj0ea4cWHnY|sL>UBI_Ab4E;hkm?b(QU_`RE$W{SoS4^i
zPM9w)9u^$>H@1XvSChd=y^4pnIH1|R!;D#$bUUb`pG$wuM@*_R9cDCAd4nIb+OZTi
zPTZK4DCRPSa;jpdF(YZ4+dy~tm+_lT$P|5!3J2Yp4+E%{2eIWroBZRjCNuTS!c6fx
z<qP^8Kh6Q0?n_csFgGOX6*PwE(rYnCffFj4PR&c?-owb5PG%CNSAV<BmEKR^f$|0<
z5fF%W?&k%0Y}D5d$!A_hP&zJ^4TK^F^Xuiq^Rjn&#IH(`89T@xU&MKBwT0{3QQdzh
znCCtsKh7Ng;?|}cPj7pIQ>}nHcTbMat>VLGV5d1oV~ik;&he3<0+eoIA+yR9<1F5i
zJ_(GozlEM-`wm9>@Ni7>6_o4?*9qU&gNG`i9N#<CTyVn$QCR#JvzXWY@nED&>Im0b
zDVV9{K*4F%rvnW*y4qkd=daCzZ%t|)XMOui$(tC!Md$Z==YGnFs02vANxZ5~4%kN!
zJ&}l34Z`c>n(7t2OtS>ZYds(yB_IbSBDrHPPbItO1OwE=ZLp_TK*(A`*qp36x}r~J
z;LL&>dUX~E`sZu$4q9M}F+$B$LWs&am%in(d<tMQ(9w-v_50A0KDULKX%<C8gm_Wr
z1o`nEDOg+D_}l}o!>CQ3j{HGm)1xDnVJv*$V#YJLGT87mZ=4kYy8VF*fSr1td@~ml
zbct1+y9Oko`9@4WfAlbRkK;}&h`1TG@E05#(P>=sQ5;Ob!8kJJSB3MllGbSgb9M*D
zf868ncS}5cZ@%G+x?^{>rV4EOi-dv0xZfwCaJEYV%{;TZYtKg6K>gkNE-%3kqQ#PW
ztrHs4*3mQ~3m+TEGgKFV38qKkLo_2nUmt3nqOwkn$Q<fDBNxxJ|4l?Aun*O0t%oB-
zyl$lChm!mJYXP)kf2Y3@n;bw!2qe~ASn8&1#8X;S?}T{rFq}-8@E%08CBfrS653nz
zg)|MBp?@#SA(&;^|HQ*ydjD=1cMfEVd6=E=#i_|jyDKwBQ-a<;_|&3F7AaVv?roa+
zjcI9X*ZrY^Y3r0lQ2e?Uy;z`zEF&)jQ!wP((|$X6&)}Df_^f&RfL+n|e##th$#YdK
z@#eepnq8`X1Z<WtQ5Md`A<~qZMSxg0tOUfmK~}iTl7{0Nb5sF-<nRt`09eHYt!#Eh
z(8~5waRgC&|C9S0+2T`LOdTU8(f7WKMCHTf87~jZBtfJc?Mtviww1?Gw1Fd}t_$)l
z8S*Qf)bmzFq$@fov5rg2U)-?%gEo&SFwBdsW;NPstxKKSTd{T;Gcrr1rO~HDced+9
z8|*kTqpw3rXI7+@u)T%ck0EsEFW`%p;dP-z?MATzne*UBYFlX}sa`X|Y1q);Ah|Q4
z3~l(_1!ENIKZEPq&a)Fa>6z!6HN`LlspYFb0ts$+$}2{&3E0;YHw&rb04SC=dX6Z;
zJykX2*K$@7a*P6<1yd^$7>Up-i~36IRi01V|E%XRao~@hM^P;3*%g9!Z4C~(*hE%K
z`ykQcIW4<^CJpz^T<7xjw;I4KkzUjaYmn_FV|h_J*fW(%;w>J0yrzs(0a4MHjK~S{
zG015tyA+!Vw>|{+z%+T4BkK4Uw>NUdyBQ8IF7LftdIqgzW>#dO;KO@7_ymF?8av6+
z?KUwu)E4PS2K~1M!1xDkaJJunuK<vyQO1tYBHy9xMFonvBTdDn*wSt9nAYjFCu8_R
zj;c}&1m;k$+JatZAOrFq=y_rmc7180rTU|tQX{6Z<O8HXJ4%%ahd3}n?(VQjk*Y|*
z?OTb(EFjm7;tn#gINux<t2B)Ea(pcslLmf10Aw)fC3zQ6uYZ<@kyzEJ<9QNN<QSHG
zb1Nf$EZG3s+0g_W;bev{h5e%H<@H))KDa(0*wQ)mUn+T-Y;tw9F}k%=_>`yQ(|@~?
zcp>5bK~dT@MkQ`|`mr_f^2T2(i7TZ8|Ja&^0vnfw+iGCg#n-CYgVJ~Zwhz;H9?V$H
zm)yXGM>~i#dY+5fJ#@*1{8Gwmc{85AU$p+=f-n6BS29%;c(HuV^V`+Nym7UcXG-cO
z^&HGK&L?kkIv#;z$N%bT$BteRl%0b)E1^B>pFz|=Lj5aU!(-2zDZ6SOTk5^R)nL6b
z?Qik*ROXaK<%av#UENi6i#Ot*Kg-yc|L9rJ<Sa?l>Oz=eO76F?cr?2qGa=rwWNhhN
z4!5#a9xP}Ve^)rBxw2WT??|X_$F9(ltEvo8G_a_Fy_udU6rXR_(d48PXTfHu5!2W>
zu>N2>US1{n{v0&7KWQ%gX>Ql~2PLWH6UjfnK$ZGU-CQLptq{R~zK=$>H0MZXeNg;~
zEc{_9KJOW}dC?dTHfhqmJhHd#;!Qmu1eqJR|2xc0B|0~2ZP>34_mY3W!AC!5V=i9g
zEH`{h-cx42)9S1E7aIJ=^B(&OE6hXzs*k&JaAH9026q<3z+D3&P1^8O<o6pswpisv
z1NY~5e;t35k?(a7%*tRb_?Pa8L@?(sRPjyRX$&|$Xi-@w?=!t3ZGn15vr!a@OY3Cv
zbs(&pk!ef&N*zRO2xnn}yx|Y_WThoW87|I##CeVrUQoxopO%U1R_%`SJON9Ps;jgj
zq-5$+;_ZQ6vxozz5{4h>dkdOH#NvUSobua`I1j@bnOIT>V^%PPOlMQCw7+dK(TwlT
z%YAajef8HmVqmZ+(|n#CXa)<EEi<T&<1*e~$#5XHhyo4hJD`{4nH*er6G%7!QzPw#
zO|?u@raDPDW&CZq(d!R3o|Gvda|5KIF=wlKw{|hPiGXHFB|RE;q~uZMdK4K2XX<o}
zvbHI82hrnY-*VPu-w*{<ABtE2zyK5W&{&VQX3#`Yy(7;S)Y&+)ac&?QfQ+;K1u&C*
z=4O5y0J}Do^UQgIA<hy`XzP8J_d&%z#rp)iyt36gVf$pygevk46?usZ3ZKmO)!b!u
zqF}Le`-tncp<|2Wz;WM(byb`@?G1{(<V~8;JL^ywo}u#Lgni)5`D*yOu2UOav;2!?
zd6OTKnjCwXBnYuOoSl!GO<h^FtNTsHl&p{80;p<M@#pi)`q*$(z$RQrXeI}_=nuBK
z)|((#pT#ewi_ZtOcW^4ch^>4c!#m^T#vb{?5a@kW!;2x>MpC(m%2syxXw&EU1|OhL
z!7d3RJK*e2Y`{D*F+<mdJ^mIjs{4Emkymrd2waGVyn?1M0?#(*(Tw?Ba-Cl}rl!yV
z=N%{jpJWWm-vi>MB%)6G!~B2k!rW%X`BC{t;XvXtgaAGBa=D-K@M&jKCUieq)eYPC
zR;c=m9HdZ96kxeS{05!BinTJz3*#jn!hnMlzpPOglSbzG0OP0+%xk;kLhAlth42;i
z{7p`e_p#gW+wml@+dWQh@5dd$xGDP4O=X+Dkbm|+s12|N<HdiyeE3ojI~ITZUsngP
z!r#@w*90B5DmQuZEj?NIpG|NmG<_KY;=UZtx1nUY^D00)Em2wd&8DV<@qc2&#leeq
z!>bT3WKaJxd<{)uvsTeUOM@jnOYpxBuw{AZ9j>SWez$%%T;^_J_b_;R&bEyj$D0O`
z4GL<fWE-Q-QE|P64N2>@r{sFM%)Raqk8rl#k&J$<w9fM;0%YsMjg{lZVBJIU^kB24
zEOfdCKoG0q)jrE&2m0!MIkgXEYVF&StQCw1nTEX7Zz5wZ26gQvtn>eEDtxdL=5=2l
z{--Y51;0HF;<JP~uT4kaV{c;@i}xx5e9B6%wiK7qN`K~`{pp@3*|j8WAm%Dm(Vjwu
zRq`ppA!-bNXP&=u{7>fj(7SPDp)97Xt_o-%i)Bj+STc8fZ@w1N9_i`_ag%0&vLZ6k
zEBf_a--=C27f{QKE!dH`BMQlBK<!>}B!^_v4JnxT=X|)YPS)S?;s3I~(%#^X?dk{y
zI0QzX%&|BLg-|;d6WC{7Lr^Nf(J1@Nphpivm;Wg#PC}^V-18@JQrLDcxncc@U+R^w
zr|TKPxA-cY3)D|Wwuv@wubxF&r~Kj-$AwW5KVc|-YW;w&KSX}uqsqV|jtrIHu6JDv
ztL!iP<8D<E(+55d3hbb9{2AX(#nZP*HiyW?2wideiFu7?B*LCwYvDm@X=+g;Y&+mO
zY~ZcsyxCRTq%q9&r7OK{-YM(gwZO&@9fbm80|=%|&AnIE{o<aXx^R+}kwyZ?6MdN$
z+=-zo`JE1>c`-QH-TP~rvIr}Ly46Ss2f#qPfW)3lC1y-~;)&s&z}nfh3BPAtxG9es
zR>^NiT8z)XLjWhbw-mSX31>~~!E+w-?)94f@6}3T?2yAr?8+ch4{!9A_59|$vTitI
zG_%k9e&Uidb4!IH$2$#j-E{J9-%*o0C$Y^`C3#bxkHMae<r$enZr`NOe>qM`1h1wl
zJ10dbEDoJ5#0!kJe_Z<W;6{V2a`{LF9xLx%Y_kan8Fu4wakZ^hnJh%L4=nD}r``|0
zM<KSj5Wk|t2613qx*Pr@(s8<_)HqThF6Q?PIs(k@z=)bb^Pc#NOrG&1ru#m-2lc$P
znG1Faqvp7U#rH^wbV4sMK#1KZ04b}IZ#;4Nb>1rLz_R?F?>l>rsShLyvlb<qA4ss6
z;rgUhVy0SH!3Bq0;VJ~)iZR}27WTy6auVHCswa+qO^^flfzQxSyWU4d?wXmXGChO3
z<QO4Ht_HFJl2J!ScL6_4=7aCG4}ixdmY=3e;1{Mk*H~|dHBdGqoCCt9W1a?!t1}!9
zD?g>{nz64|h|h>e%}g}oeAtNQ7yS!8(t<%~utLfZ-LGATO5U%+Y>kTZ_%IrrBXO#*
zvFIMn>VUhucKlWb;@6&xDQS99=x1{M;i<&;>k83SZhC*BfZ){Y1Q!P`bNjb4^JTBL
zTHEpWiYtFOdi3ws8Bo#9nd!VU>}3f>^P>)ulpqWlEY_OPBnoIREzBxoANtw1qWY1{
z6THEIVZ{4mp4&}_-l9Ko<)c;QvQPys*s|Q(<^{7-Ozwqw`jgsnd@H%7ic~}&EEjiI
zre4|X{p3F8BRg0Y4B$>D;`u#x_KmsyS}69Z0~^_Kf?7_Yg3`tdMCNVpspL{YE=N^m
zW}f{Fr1*KS7=pz52xwN`Cb6cg7QiSjK-D&6P?JPo2~<`lmGUiVGpSU?BmxY1C*0ah
z@?ChT>^P}kkS!RG=@Hm+zAw@B>0XTI7WwXwRh7ctJn_`s|9S4WdA*X=J<n->ArZ%y
z?{x;s>tC>I8xR0K55lqEIbx;oef@^Y3MrMy+wE0Oo!Y3uo)m!mdZ$V<Lf$*2liN5S
zN_`)E7!-cBDWD@4QvGF)0+E5iNJIP8UrAb|QU^XV%ApO$&Kf`*%Qi_2`eVprsZ+ts
zDm`DB5-rk4S{twNnc{9|E~T|RsW9&hmFf99(@G3CY}`J4pP`B8{gmdYToA7x?6f<x
zK-4k;63aEI#iS=Mo~4+Zu+&AtaDc~y^bA?uZ4oHV*-?g|2ruBd4l9o0Y^sTn6$KES
zGfxBu5I7d{B7#VfyHAN%^xGl3n8Z&v@@1K)n)sebNQ6v@P@gEyAL~VDn?%6p{h2Qw
ztf~asUH3e~>*sK}*dM-K7MF^KhwYgf*xvf^f;8aO*!f^&Vr=q4o2bP9c%!sg*eAaj
zsfs(gsBT9Y8|-da*sj|NUlH-``-0~5K;nt%r-*>rGr|`V&oPXZn+W+Go4iG$e&N)*
z_=UU;Aco^?&EZ_517Cx>O-b+fbA%vuo&$i_!OD3m3l}8Nzw%)aa^*3E90nnmSpAx&
ziDxz+;UB+f7umwGxYx%EVWmFqUOW8JePvv8Wog~WYTT4UA=}l>jZU)a<>NwLXVURQ
zVeQhQNUpAzPoXoZvOXueoQVtOMtW9!DDZLf0`VE`Ved+g@LT1JPvS4?b#*153@Irp
zd({o?!!v(Q+Q#mKS(9!y#I~Gq3jTrOToCrm#KBOs$F>T%;Rm#j0b{O2k?~je`R^o-
zUvy;c)=;VRTIwCHuUxUEj)zf{E$`I4@sE$Aq{8$S7915MZ|6<KM**a}m$?9rtzoh7
z_a)aTK%8aB$15IX)9sBFpx%rSul$tRINtrX-@BKghd|i1D_;1wvnBh<G*rC(4V^a&
zeXd12Dm32rX{ly+03eB5B+vTLb^kfg+)IZS2;;LTK0Td12n>y;Z}@#SKX)0kX6%|$
zO4ON}W)6%kh;29@{=}vm3{E444-U{?qWXyhy<`X82a<9HwLg6ySjaXsQh)2@Xt1e^
zJwcK%u$>pB_?j-^%N4!EcL0nT22JJ%c>tnO)VD4bFn*uel*c_QhvM>@9b5E$FzRlg
zn-_$_z5dAK0t*PxDg2Udi1PsM^V&P~dEQ8pCpz#_zPfV!`RN0IZatY=!qDrzeXfRt
zI0xj%X|TA*SiD1hi*Z~WDfo)_5_{Ok#k1`c-{LS@>`09)5oFwZO9)pzLHdTgr&N#_
z5Rp|fd>L~WetBjwyOhLTwx%%AzCzT&)&zEk2A-Cfvt8{Q+<gG=-|5WVB4WQ_mNP~1
z!=u`>e{T6xi;5k{a8&Nto|;m3CH=nqdo%2g{J_(E6!Rd-ZTTI(qgf<&Go0!dJFRc2
z4$ZUQ&&$2QM~=U`X9;D`NU!<2*`wamO0zZM(uVRtyf?5Z@!_3ONWM&X!~<InGlg3b
z3)tBw55-v_A_)|Aktkj@GAVMXhzn0f8I)Fud!eEt$}Cf8fT!fq`Qcz=mw*KLORyD0
zbnacQ6%>r(M}7OvPY&N-KiPq~St@}FKCfw13g)Lh9;1*id_&$WV8(Iw$3hIhPgH|%
z^us^!05Rileu59iwF-?F00>AzPTS^RhQ9;s>=m!Sr76#IZnZK2SI!#8drw2exrf2`
zX52HvvA*th0ngxW<+v$_qEm1DLOEQMw74@;Q3tS-l?ZBrQk{y%wZE)hu;T&dg4@e|
zE-QBSd?jLTlPr3Ue+F}#?8-LTwgQv-e%j#x#<8Zif{?zi)y&>^u~@f^mO)NhFB>E8
zqkc`>(Y?-yg8&>M(IuA)c2yVCk@}&WkS`x|ZRZ{G`8z9ZKC!&_HPFzzs;$`-+QR0E
z=f#pDwK<W?i){GGu`H3IYPp@5gVJ32s-Hf#^@p6KK|eDA{Xn$pdCWHstaf8g#e?ny
z#SU>M$mmjFFglQg8y_17ZYL4#d=v_%jr0&@Fy1$)esNSe2zmQ1Rcc5g-fE{qh>q}(
zp(2D3p0SVL<R)u+*AFk^b28Q?gUZvNcFX$i5I|L;QAy!u3yu&pj&G-NT|8;~<75hp
z$SChxG};p{L*z-_E9`Tj(crnf-%O;X!o1wbKif+^9DMHeIn@oviL?<`B--$LKLjZ*
zSI$8q8mC%-`Q=DzOMG~A+l;vH>u`#I)yu*wi&u&<l9{53gGpO83Y64-9O3pUd_#tx
z9=Lauq#B;oMo~~j99M;2d>^Au_ofenazJKD(u_Fz7wq1SoIXSFb@xQ>J^-rczEI!y
zAD=)6G%*6iXTMxYr=@<eCF(f%gyf<saqBTTr-gVDut^8B7u!pRqURTEKXN@_aL{ar
z0nJ8cD(j!WQhsM}?q-JMFcUOAJ{@qB6N<j|o-ymloAbr@GD)UO#K_uEZ-<r=<00yx
zpMRkW71f?)Fd^~%;_%(%-?Hx5*$?s9i2Xj^K6A^$uGeS2Zf|*sy~eYHP4Dj}os6>N
zYr@<E#}b78BCkl#$R(9YmG(a&!>I)3$;yp>BDBDZ*q&%1`nfj0lC)R0)e}JU8L$!W
z@>SI54|yn)rf6BFVFOo%L&Ovcc`6|M%%63HX7q7jz2c#z&Ale4C+`>fc=RedHt)xi
zY<*z@cX6;^%Tnt#!xAo8X_U(Hn!d~Tejeb1t5XYzPI!Nx$A3POGsf>Ytv2R2INwO$
zbtOH|r9-yCChv2PLb<LOt$s||eyqTbU(8gu<derbcxOi|#`nm=eX&$YpD*4!{6XqU
zdL_%awICm;YKv+jXED5dFRK`cjq*?|7-aeaH-bH?wXc4FIWy3xw=a=Wlgf9nHhRt$
z9y_hGvgCOjALGJEtF!2S97E7N8WD>Mn7{ot-)d-IJyW2|quKN|<5WzdU>l<@YTz~a
zY9GjDhSEX-+;TDD+Xy9*!ol-1Mu?}`uRA*ZYVWr+w4B*_8)-KHiFV%7cLZ=Z$vgu>
zA@ELS8BOZs@SvE;tqs4-;z6a40xuUy^rJR*DE2g8Fu;jvrY;=D#L06Ppffk^-k^s3
zmcDpb>+S#_@tkY*6z>G-uuVm2VgE}vxNDF)@WRaKzF)<UHw$*j?d~Of_0JgE1Z}-!
zn$mI0`Cn-E7Crgy>^=Qz!EnR(!Cmkk(OWmdq?&f?4Hne^0!OnS7txNwzVbJRPQWIV
z`V<sHc4GHu(0Dqkh%5NMw#j`0AL>enSSmwJOO)2AfvfK%d0@d)xZV=Ph@4qmlb&MN
zQuK9c|3+CHuqlkeg3tfu%5AxayX?6;x&7)O_>Lb^kuX8pl)57Yo9eohLWvx|c6UEM
zvmWE4Eqii$(UBvOi5h@=>b6`H8T3wH&RxuOe^KAnj%ADnKl^4rwVByBRL6AF?Sptn
zI+48O2}OdtB5octiEnI?M+O_N5AJjSDK7z<k5;t@OVaYomiA!h#`3eKw)rO|y%eD4
z<YOx%JH^>03A@CFN2aT{RGbGQ$K}?XbqTJ}7mTZec;gOm^&i@D6h3mW54aO<@R*}`
zq-j5c=`4Ze{9y1W$5~WBoMh(i3g8P}#aX)qrELng?tY=n?XaYrFTxN~xVeW=CU5bI
z!>$WJq`CUZnGtwyIQ*y%O(b)rd}N1#s(#10)T{8Atl%4E<w{`tM_)GbxQ?=`hqnp}
zP{(LDa4G9p-x0%epRE_9zEy~6Sg?!dl;*lE!`o%bY0y8|XG$#e{>bW6Jr)pNE)fG*
zbPY&?@`M?snQkwBq+_2+&zC1dtBFX$0g%|%XP{azFT#4#O@xm3@Olu{dnAe2kb=|a
z>h(m&kECyRsXlb2f5D_yZm*l*>0OgV?zjZT`Nf)HdkB>aLVyq!LOdNlr8+|kJp&4=
zwz8PcMnGV`ZseHZxkwj&I99_!yu+#iUVOA**X58&Xa3hv)w3-rl2my{kImcQ$+*eh
zpP77cF>;I&D9)k*5=-|sck7~lLgD*S9{!A?wbAqFY8Olt+*78ULIP38PaJx{IjHch
z*e9%AcYywehbY8q$L2bS5yyu#D?jZ^_?<gD7b}F(<#qeorXO~3anT=EJm(UD6*gfZ
zhzwt&k$tf5`CwH-(oxPSBw`Tlb%195Z4u){Wg8<9Y4|~2`S&sXS;@rs{HsdF&mB}^
zUrQ8VFF78jcyHqHXnAa$q837%%^BYAd_!udr#5{sYyUWV^m$y{eS^Tjr(_b*a4$Ax
z3{z9<wja5qDRS<D{OBts8{sSa09v^ZzCLG*hP~EfGxh=GQ4@%pcS7tC+m|#P!8v6d
zAo7wE>)jKHGG$yv(kMQxMk1VrE)^;xXhLJ{Gt)KO$&yl`x`V$db1nrnQpl68kwW$X
zk2_Z=8>Y|1JsN)YRqGlhK5Jp2FgC01Fy)MEBteHKaFtdeo(3Z4Jbyjwu^M=5(Qhl{
zSK=u|0xCOi%-Fvvc<hXrd&@au4CInA{NmQ|qg0n)bO6R`awQ;AuaW30qa_zVtKUIG
z0caE0aKxoRXHuU|4B+&>UvDgd@;Z+cH!N_TTLY=BS$ci7mIL>W*&*0?;nc^ycYM@N
zE;X|(#=2uumvTfy|DW=%G@Pw%4JYNssa8~#Dk-fFh@m>ChE{88qcxMBgpv-1gh;fa
zXob^~R%x|0RneGAL=sBGlyZ;Kp0<LTDiJ}=gdT(*Dem^1`#ksmeV+U4zJK=mvDW(5
z@a^wg@A~#$3wd9*^W%dD@#WuMu}zYj|7JTEZPfXL<5o6J1Fv%W>t|qRAiDoYi0n@~
zsGHZ3Zx8UQ2bGBZ|HH0F9!r!9(x2_L($P^-YBjCAZ1hy_-PQ7bdtZ&Dmg;H*>X(E^
zkFM?69*5IF8$As)FzD#$IJIhGW@dKafbo~Bljf$0cE*a_xIM<(cRjG#`y&2uNBMJp
z+zo&KqQ|rT8()lIt1gsx9KJi2+o3TY8f<97VltV=N(VR_Xl@pInT+jq`Q<hCI`WyM
ze?A2)TSDihzp6M;Xai~6lHW19Qn-kAkolbJNLot67dBLTeYVcq=3wn`eX-z>8zk~;
z${{^ld-QK3afkV%a28?Rx_IT2&RTmk^Xq8BHSO(jJvG?M))N~0gh3r=`N~Ol>5Qs1
z<-M%8Y2#Fzz2wl0H;pF{)<X3H`O}uiT51xe*Y!racP=a@>lb`6J!lYA22plM*qlOb
zLG&K#jb7HMUfQydtbc#l^oX8Oa2dqO{FUl1eW<t445LFu1WEeGuEMs*AvHCxrBRgz
zzGFXK`^muD<uj^7KV8dxlhJLR*Z#t=sBb{4en?zV_*Yw65{V^9TVF=}cr}LRr#(Cu
zz{=7Tp{Nt~M?SYmbmXI#yFe8;^6{9!X*q}!qqA>RG)puKvGlqhb}F0s#7G(Aa0+u!
zF&6;~++1~JQam(?`@N5HV{Ht(y~{_=gqye(M$M)~m=~wHrmPs(L){~}*;Z~&({N`r
zfRWOW+p=CSxv7C@_qE@L_qt5H!(N>E(|4Lb<Cgi`y;U_TLR(995A>1wD=2?6jUSHP
z?L65@rx%6wEex>IiYUz-(A?U0vq!tAHo2<QnzNqXaOWveL-4w=2uY^{<!h8-)JrBm
zY{Z>TXI7WkExDbOL57*3IuRcb`!(N^Zkv729G!zpuN?z@)?&Nco@pMMY$RP?baa-2
zoNJr?0|9v=@}^6c%uo8=kzsP8xkZU)`*;36w|9vBirEM+t^Lq_DJxTDOY_C9FQtVS
zWKrU@iWB1Lhw2VX+H*Z0EN|*kjzfDPkDH#+G-K^(&7G|25_-cGR|WU96af9W$%%kK
zIBC<#$f}w|SBWkD@aukk0|SFV9BVEhFpNzZk3?NUiPD~J07=U&0KloK;WreW?oS)@
z3+^y)l^&P2Iw^=SEr1?3V_icBk8knF^=}R0=&M-?9*U6k`jf#RUrgo1bB7_n!aP<$
zwyyYt8&b07;_xm*brMOx`|0Y+<ED`Ug~TQ@t!@`_s#San3{oIWk-oZa0X0j{4wt>#
zT$z&clZ8vDdeKuf1PA#FFR>9pbMjfymLZ%#rutpP-A{Rtmc1R{w5XjanUcUP+2FnX
zgWoT&eo4SWV@wM@cj6>TpcvkYK(rI=dw)P6Ba#A?UPFp0wL4f^=!#nZ3M8Xj@ZG^l
zX4hty?o!fF3%T_xGkEs2h*FMbGHqx5lDctBdCk(ECCA2^X70E-f1v4j4c7>#_@*uq
z(Ca5nhDhE_#e^8Sy87i$|0f30Ty-i=tasGU2vhJyS?;zqz3BAIKbvudM?jxZQ~xF>
zzvm}4>0S(c;|ccmWB>vbocr$Z#*W0m8B$LbBIoU_AEq5K1JNuEYfO1uJAJ&_s-ioR
zp`~BKuA7t}MoSSv#+$N2Fn1%-c#!*XpE>=Wc;S~{Zi!3lBGe|LGfC$ct6PR1A$h|U
z0aFaN4kY;*s|80w;FLiYMP1Q@^{y^Hn;LP2{6Pm7rvu>2Ik{u6Ej}WGAcE2!Gg=3Z
zgJz;Q700G88NWiu43in%wT0&AVPr7J6Mqwf;dz*=l$N7k#`*}Do(w=`^P91F73aI}
zr-Ys8g4OqeAo-Hg9zR;eA2?1Gc~clLOz)3F)AdtzuD_%sjjS@FPI%{bsrgb}%|W2K
z?2I){69cIxaI1}Ll=eaB?z3KU1T$EVE=@E<3_6^*5KL_vA|hhp)O<*BI06Q>my6M|
z#CSslLZKGA1nf2atCxa%RB)G|gCrU%mPaSV;63NCoSc&}48bsx_Hk$e1k?wm=4($V
z);Ih;XB2qYACy4HF+63(zSi|HKzj8+prTa4FGeo7vrY}F$X169Ufo~ZRc)_9#Uu>X
zjuH{L7)|5CENJ*1k5e#5+CPCt!FsNW5x4Nd;f-WGE3*VIvf}jGLBqd*iM5^Wb^b@4
znZ~?HZ5Fd+16swv7NRAKblaIuSuuIJ92grNYXfW_f<gNL(VsnCu2{B}Xh-QBgmNKd
z)y7yk1!#4Q?sO0HWUOYe?NMh=7+$5!7JqURH}b^7J1j|>&BSmC-K;ZmR?;43c1}_i
zK~x*YP^XH*D?Qmc-Z?|8I_d&q+imKGrP853M6OO$1k}2g3^ITG3I^EG$Au{~44s&y
zNcpDHf_;hNPGh)LAdko(um^^F<sXCq2|<h~WwY6HC3TX`9w}s<YDM>`kP$U%F|>Wb
zY_Q8LtcE&G2-<+nHMMWa6Qrc3taF?{7TPX&-(LG`;d~~8+~5Z;GjfLII8S-C^?XXH
z>2(^iAi@N4xXew*sy5sqxZ01)l~brZzS}BQbVbRFE0mCByLS>9{&k=9rO_`>&{pC_
zgb+}PWC?Fo(fbkE7@=y#qqJe%+9P@iKyJrnqSDga!_bH4&Opm+#xIO!Y?WTxPS%&|
zf_zalM14V*4XANQa>%g>2op?F5i_6?sjqy(+}Ds0YtnNOfh(Scl=SBYaD|w5d)NRO
zd@^!?wFPLEP_i`b=wBhavbj>oh^d~Awv?d2ML>aGx#Bm%=~$@oT?}T<6#7tGi0I#k
z3~YO8W2Y}g`R2Hqg9ne_q#-Pd+^?Jf-Z}SwxTccxl^p%9e~!vP7%5M{+e;fPK$R)!
zIv~xE^z?KTxEOw@0ZJ>ZudnB*H!^c1Ciz{{-Y`z1bbfw59ylbiz8>7z3ZyZ%8xJIn
z_miyUEEo}1{5%-blBqeqqm4*v9UUFzGGZS$Mtw%?kK77G!QiRixlf?uj$2Xiy=v!=
zFz#x(oymN)t&IGD0+)9C*XR|&r8{E&vmS4qr#@_oiaiD@<Vt(Jt%SU&Fq1)?$Blg;
zcTt^x_$Cv7k1%6U|5V@iBS@aONOPies-%7^ZDoKLm1^aMvA?0>963I~L(*0Uco<u&
z<=x_(W!$3oOjkSu;>Cabn9%rDllWu~m@926A`ysjAc8VpkLSB0)pC?HsJG9}!BUkQ
ztI;6`+%3AU*BB+kX&+ZtJYj%$GOVMAbTXN>3`^_8VckK`w)k|HsnyeA&1g0-I;Ewb
zXS>7&RW-n6_uB&w85-h=;NCk?X?^B6a5ZkKa)`hMn{LVotuQkE+hGNTwx_ZKlnsh>
zkY5A~`6;eL_UncFx0e}6fnTIB^m{QfJ=j55XY~9*3H4TC%3rOipqO(FdTafp>Lu`S
znM(1(u`H{bz+EFKl^DSanf4kGTuP5rjHz0}RnIO3T~1)|28wxyp${0iC)PKBjD6Xe
zXb^|*X?(M&GA%XmtZ-0RQ99!&lP;Ox=#~1u#dvIOoG3NC2ABv4DMR>vYS(&1pnVBS
z_A~D7THMNqr?Ltj`sUJj%TK|Nv(a;93#&3Rn@*q1LCGBPo!LI(DYDyiF#9!+PN&cG
zMv=pA9pkO9$>th-<oa*12{?{|4t2vZyu&tZS;Bv%+zFUlFJE_;Lt(`0`8>QL@!Q*~
zK{i%3&Jkv(%};>U4;jB#R#+*@kDjbweV<#r8-%OFgAsFhxv23&8)o!j(<!P#$d9bj
zJ*3;;DAc@lf<gCk`P9H&X+txxPRT_i@~eQ=VAae-<2rv{DptM}URNT>O7&J+?prlm
z&%OavVeOD$jQ<|r_NMUi&u^xD56pt>{kcm_!^foaJKzx-&r@C=y?oCeIBrk4*Y}p1
z4^Jk35?hCgI1s0wGHg12XWmyS=gw(8B8*_jjDQW7#oxS<=X~{o!^hnPlcAmj*USU$
z2?!L)<r5{YJ}vJm3buY<C5}Idy<QpIlJ!rGh>@+Bm@Nh?lUPajn6p#U7=vSRYQJLn
z<UAG=IwpwKCI|bykq|E?SDMyF8yJ_=Xnn-q=DsdY3Gdq$=c}#V)&}GC9Pw3M<m4Y+
zxOuM#Xyd9Yrc2a8TwY#3Bfq*vu(l6(G44#(AGxfX|NQsRqfhu2Zch;)IRCRn<_oQ~
z9lQJN*IH_-)WX%hC4e2Z*mhM@ORJ8p9;P00Z75k^zD;?bTj(?OJ1;CF7eAa@C5%jT
z^!b>a|9Iiy%tHSk69BVdNoI@`&t_+H08(Lba=+q*r%w+Xbet+LFUQNg0lVyY3wd#b
zeHtzat??38sZnXc0QpFNL@9Z2j@)8Fn`Joe8dv-^OHWnhzd^^gMarHC<3IfbwRs=_
N7i=AESm%6y`xmc$!|MP5

diff --git a/docs/source/backend-delegates-xnnpack-reference.md b/docs/source/backend-delegates-xnnpack-reference.md
index 8fe346680d4..d38c5af60fa 100644
--- a/docs/source/backend-delegates-xnnpack-reference.md
+++ b/docs/source/backend-delegates-xnnpack-reference.md
@@ -70,7 +70,7 @@ Since weight packing creates an extra copy of the weights inside XNNPACK, We fre
 When executing the XNNPACK subgraphs, we prepare the tensor inputs and outputs and feed them to the XNNPACK runtime graph. After executing the runtime graph, the output pointers are filled with the computed tensors.
 
 #### **Profiling**
-We have enabled basic profiling for the XNNPACK delegate that can be enabled with the compiler flag `-DEXECUTORCH_ENABLE_EVENT_TRACER` (add `-DENABLE_XNNPACK_PROFILING` for additional details). With ExecuTorch's Developer Tools integration, you can also now use the Developer Tools to profile the model. You can follow the steps in [Using the ExecuTorch Developer Tools to Profile a Model](https://pytorch.org/executorch/main/tutorials/devtools-integration-tutorial) on how to profile ExecuTorch models and use Developer Tools' Inspector API to view XNNPACK's internal profiling information. An example implementation is available in the `xnn_executor_runner` (see [tutorial here](tutorial-xnnpack-delegate-lowering.md#profiling)).
+We have enabled basic profiling for the XNNPACK delegate that can be enabled with the compiler flag `-DEXECUTORCH_ENABLE_EVENT_TRACER` (add `-DENABLE_XNNPACK_PROFILING` for additional details). With ExecuTorch's Developer Tools integration, you can also now use the Developer Tools to profile the model. You can follow the steps in [Using the ExecuTorch Developer Tools to Profile a Model](https://pytorch.org/executorch/main/tutorials/devtools-integration-tutorial) on how to profile ExecuTorch models and use Developer Tools' Inspector API to view XNNPACK's internal profiling information. An example implementation is available in the `executor_runner` (see [tutorial here](tutorial-xnnpack-delegate-lowering.md#profiling)).
 
 
 [comment]: <> (TODO: Refactor quantizer to a more official quantization doc)
diff --git a/docs/source/backends-mps.md b/docs/source/backends-mps.md
index 5be3dc72403..0d86c8e5c64 100644
--- a/docs/source/backends-mps.md
+++ b/docs/source/backends-mps.md
@@ -42,12 +42,6 @@ In order to be able to successfully build and run a model using the MPS backend
 
 ***Step 1.*** Please finish tutorial [Getting Started](getting-started.md).
 
-***Step 2.*** Install dependencies needed to lower MPS delegate:
-
-  ```bash
-  ./backends/apple/mps/install_requirements.sh
-  ```
-
 ## Build
 
 ### AOT (Ahead-of-time) Components
@@ -97,7 +91,7 @@ I 00:00:00.122615 executorch:mps_executor_runner.mm:501] Model verified successf
 ### [Optional] Run the generated model directly using pybind
 1. Make sure `pybind` MPS support was installed:
 ```bash
-./install_executorch.sh --pybind mps
+CMAKE_ARGS="-DEXECUTORCH_BUILD_MPS=ON" ./install_executorch.sh
 ```
 2. Run the `mps_example` script to trace the model and run it directly from python:
 ```bash
diff --git a/docs/source/tutorial-xnnpack-delegate-lowering.md b/docs/source/tutorial-xnnpack-delegate-lowering.md
index 12793533766..bbda61aadd8 100644
--- a/docs/source/tutorial-xnnpack-delegate-lowering.md
+++ b/docs/source/tutorial-xnnpack-delegate-lowering.md
@@ -141,7 +141,7 @@ Note in the example above,
 The generated model file will be named `[model_name]_xnnpack_[qs8/fp32].pte` depending on the arguments supplied.
 
 ## Running the XNNPACK Model with CMake
-After exporting the XNNPACK Delegated model, we can now try running it with example inputs using CMake. We can build and use the xnn_executor_runner, which is a sample wrapper for the ExecuTorch Runtime and XNNPACK Backend. We first begin by configuring the CMake build like such:
+After exporting the XNNPACK Delegated model, we can now try running it with example inputs using CMake. We can build and use the executor_runner, which is a sample wrapper for the ExecuTorch Runtime. The XNNPACK Backend is enabled via the compilation flag `-DEXECUTORCH_BUILD_XNNPACK=ON`. We first begin by configuring the CMake build like such:
 ```bash
 # cd to the root of executorch repo
 cd executorch
@@ -168,15 +168,15 @@ Then you can build the runtime componenets with
 cmake --build cmake-out -j9 --target install --config Release
 ```
 
-Now you should be able to find the executable built at `./cmake-out/backends/xnnpack/xnn_executor_runner` you can run the executable with the model you generated as such
+Now you should be able to find the executable built at `./cmake-out/executor_runner` you can run the executable with the model you generated as such
 ```bash
-./cmake-out/backends/xnnpack/xnn_executor_runner --model_path=./mv2_xnnpack_fp32.pte
+./cmake-out/executor_runner --model_path=./mv2_xnnpack_fp32.pte
 # or to run the quantized variant
-./cmake-out/backends/xnnpack/xnn_executor_runner --model_path=./mv2_xnnpack_q8.pte
+./cmake-out/executor_runner --model_path=./mv2_xnnpack_q8.pte
 ```
 
 ## Building and Linking with the XNNPACK Backend
 You can build the XNNPACK backend [CMake target](https://github.com/pytorch/executorch/blob/main/backends/xnnpack/CMakeLists.txt#L83), and link it with your application binary such as an Android or iOS application. For more information on this you may take a look at this [resource](using-executorch-android.md) next.
 
 ## Profiling
-To enable profiling in the `xnn_executor_runner` pass the flags `-DEXECUTORCH_ENABLE_EVENT_TRACER=ON` and `-DEXECUTORCH_BUILD_DEVTOOLS=ON` to the build command (add `-DENABLE_XNNPACK_PROFILING=ON` for additional details). This will enable ETDump generation when running the inference and enables command line flags for profiling (see `xnn_executor_runner --help` for details).
+To enable profiling in the `executor_runner` pass the flags `-DEXECUTORCH_ENABLE_EVENT_TRACER=ON` and `-DEXECUTORCH_BUILD_DEVTOOLS=ON` to the build command (add `-DENABLE_XNNPACK_PROFILING=ON` for additional details). This will enable ETDump generation when running the inference and enables command line flags for profiling (see `executor_runner --help` for details).
diff --git a/docs/source/using-executorch-building-from-source.md b/docs/source/using-executorch-building-from-source.md
index af8ebfe6387..a9777425bc7 100644
--- a/docs/source/using-executorch-building-from-source.md
+++ b/docs/source/using-executorch-building-from-source.md
@@ -64,25 +64,15 @@ Or alternatively, [install conda on your machine](https://conda.io/projects/cond
    ./install_executorch.sh
    ```
 
-   Use the [`--pybind` flag](https://github.com/pytorch/executorch/blob/main/install_executorch.sh#L26-L29) to install with pybindings and dependencies for other backends.
-   ```bash
-   ./install_executorch.sh --pybind <coreml | mps | xnnpack>
-
-   # Example: pybindings with CoreML *only*
-   ./install_executorch.sh --pybind coreml
-
-   # Example: pybinds with CoreML *and* XNNPACK
-   ./install_executorch.sh --pybind coreml xnnpack
-   ```
+   Not all backends are built into the pip wheel by default. You can link these missing/experimental backends by turning on the corresponding cmake flag. For example, to include the MPS backend:
 
-   By default, `./install_executorch.sh` command installs pybindings for XNNPACK. To disable any pybindings altogether:
-   ```bash
-   ./install_executorch.sh --pybind off
-   ```
+  ```bash
+  CMAKE_ARGS="-DEXECUTORCH_BUILD_MPS=ON" ./install_executorch.sh
+  ```
 
    For development mode, run the command with `--editable`, which allows us to modify Python source code and see changes reflected immediately.
    ```bash
-   ./install_executorch.sh --editable [--pybind xnnpack]
+   ./install_executorch.sh --editable
 
    # Or you can directly do the following if dependencies are already installed
    # either via a previous invocation of `./install_executorch.sh` or by explicitly installing requirements via `./install_requirements.sh` first.
diff --git a/docs/source/using-executorch-ios.md b/docs/source/using-executorch-ios.md
index 7238a62af79..508669112f1 100644
--- a/docs/source/using-executorch-ios.md
+++ b/docs/source/using-executorch-ios.md
@@ -11,8 +11,7 @@ The ExecuTorch Runtime for iOS and macOS is distributed as a collection of prebu
 * `backend_mps` - MPS backend
 * `backend_xnnpack` - XNNPACK backend
 * `kernels_custom` - Custom kernels for LLMs
-* `kernels_optimized` - Optimized kernels
-* `kernels_portable` - Portable kernels (naive implementation used as a reference)
+* `kernels_optimized` - Accelerated generic CPU kernels
 * `kernels_quantized` - Quantized kernels
 
 Link your binary with the ExecuTorch runtime and any backends or kernels used by the exported ML model. It is recommended to link the core runtime to the components that use ExecuTorch directly, and link kernels and backends against the main app target.
@@ -51,7 +50,7 @@ let package = Package(
   name: "YourPackageName",
   platforms: [
     .iOS(.v17),
-    .macOS(.v10_15),
+    .macOS(.v12),
   ],
   products: [
     .library(name: "YourPackageName", targets: ["YourTargetName"]),
@@ -66,7 +65,7 @@ let package = Package(
       dependencies: [
         .product(name: "executorch", package: "executorch"),
         .product(name: "backend_xnnpack", package: "executorch"),
-        .product(name: "kernels_portable", package: "executorch"),
+        .product(name: "kernels_optimized", package: "executorch"),
         // Add other backends and kernels as needed.
       ]),
   ]
@@ -113,9 +112,6 @@ python3 -m venv .venv && source .venv/bin/activate && pip install --upgrade pip
 
 # CoreML-only requirements:
 ./backends/apple/coreml/scripts/install_requirements.sh
-
-# MPS-only requirements:
-./backends/apple/mps/install_requirements.sh
 ```
 
 5. Install [CMake](https://cmake.org):
diff --git a/examples/arm/aot_arm_compiler.py b/examples/arm/aot_arm_compiler.py
index c663b150fd8..5449ced09b9 100644
--- a/examples/arm/aot_arm_compiler.py
+++ b/examples/arm/aot_arm_compiler.py
@@ -668,12 +668,12 @@ def save_bpte_program(exec_prog, original_model: torch.nn.Module, output_name: s
         )
 
     # Generate BundledProgram
+    output_dir = os.path.dirname(output_name)
+    os.makedirs(output_dir, exist_ok=True)
     save_bundled_program(exec_prog, method_test_suites, output_name)
 
 
-def quantize_model(
-    exported_program, args, model: torch.nn.Module, example_inputs, compile_spec
-):
+def quantize_model(args, model: torch.nn.Module, example_inputs, compile_spec):
     model_int8 = quantize(
         model,
         args.model_name,
@@ -705,7 +705,7 @@ def to_edge_TOSA_delegate(
     model_int8 = None
     if args.quantize:
         model_int8, exported_program = quantize_model(
-            exported_program, args, model, example_inputs, compile_spec
+            args, model, example_inputs, compile_spec
         )
         model = model_int8
 
@@ -741,7 +741,7 @@ def to_edge_no_delegate(exported_program, args, model: torch.nn.Module, example_
             args.memory_mode,
         )
         model, exported_program = quantize_model(
-            exported_program, args, model, example_inputs, compile_spec
+            args, model, example_inputs, compile_spec
         )
         model_int8 = model
 
diff --git a/examples/demo-apps/apple_ios/LLaMA/LLaMA.xcodeproj/project.pbxproj b/examples/demo-apps/apple_ios/LLaMA/LLaMA.xcodeproj/project.pbxproj
index dcd8d5c6dff..ddf7f32f043 100644
--- a/examples/demo-apps/apple_ios/LLaMA/LLaMA.xcodeproj/project.pbxproj
+++ b/examples/demo-apps/apple_ios/LLaMA/LLaMA.xcodeproj/project.pbxproj
@@ -547,7 +547,7 @@
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 			shellPath = /bin/sh;
-			shellScript = "set -e\n\nif ! command -v cmake &> /dev/null\nthen\n    echo \"Cmake not found, please install Cmake. \\n1. Download Cmake.app from https://cmake.org/download with version > 3.19. \\n2. Install it to Applications/ folder and run `sudo /Applications/CMake.app/Contents/bin/cmake-gui --install` to install CMake commandline tools.\"\n    exit 1\nfi\n\nCMAKE_DIR=\"$TEMP_DIR/cmake\"\nrm -rf \"$CMAKE_DIR\"\n\nPLATFORM=\"SIMULATORARM64\"\nDEPLOYMENT_TARGET=\"17.0\"\n\nif [[ \"$PLATFORM_NAME\" == *\"iphoneos\"* ]]; then\n  PLATFORM=\"OS64\"\nelif [[ \"$PLATFORM_NAME\" == *\"macos\"* ]]; then\n  PLATFORM=\"MAC_ARM64\"\n  DEPLOYMENT_TARGET=\"10.15\"\nfi\n\ncmake_build() {\n    local src_dir=$1\n    local target=$2\n    shift 2\n    local extra_args=(\"$@\")\n    local build_dir=\"$CMAKE_DIR/build/$(basename \"$src_dir\")\"\n\n    mkdir -p \"$build_dir\" && cd \"$build_dir\"\n\n    if [[ \"$PLATFORM\" == \"MAC_ARM64\" ]]; then\n        extra_args+=(-DCMAKE_INSTALL_BUNDLEDIR=\"${CMAKE_DIR}/bin\")\n        extra_args+=(-DCMAKE_MACOSX_BUNDLE=OFF)\n    fi\n    cmake -G Xcode \\\n          -DCMAKE_BUILD_TYPE=\"Release\" \\\n          -DCMAKE_CXX_STANDARD=17 \\\n          -DCMAKE_TOOLCHAIN_FILE=\"$SRCROOT/../../../../third-party/ios-cmake/ios.toolchain.cmake\" \\\n          -DCMAKE_XCODE_ATTRIBUTE_CLANG_CXX_LANGUAGE_STANDARD=\"c++17\" \\\n          -DCMAKE_XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY=\"libc++\" \\\n          -DPLATFORM=\"$PLATFORM\" \\\n          -DDEPLOYMENT_TARGET=\"$DEPLOYMENT_TARGET\" \\\n          -DCMAKE_INSTALL_PREFIX=\"$CMAKE_DIR\" \\\n          \"${extra_args[@]}\" \\\n          \"$src_dir\"\n    cmake --build . --config \"Release\" --target \"$target\"\n    if [[ \"$target\" == \"install\" ]]; then\n        cmake --install . --prefix \"$CMAKE_DIR\"\n    fi\n}\n\ncmake_build \"$SRCROOT/../../../../extension/llm/tokenizers/third-party/abseil-cpp\" \"install\" \\\n    -DABSL_PROPAGATE_CXX_STD=ON\n\ncmake_build \"$SRCROOT/../../../../extension/llm/tokenizers/third-party/re2\" \"install\"\n\ncmake_build \"$SRCROOT/../../../../extension/llm/tokenizers/third-party/pcre2\" \"install\" \\\n    -DPCRE2_BUILD_PCRE2_8=ON \\\n    -DPCRE2_BUILD_PCRE2_16=OFF \\\n    -DPCRE2_BUILD_PCRE2_32=OFF \\\n    -DPCRE2_BUILD_TESTS=OFF \\\n    -DPCRE2_BUILD_PCRE2GREP=OFF \\\n    -DPCRE2_BUILD_PCRE2TEST=OFF \\\n    -DPCRE2_BUILD_PCRE2GPERF=OFF \\\n    -DPCRE2_BUILD_DOCS=OFF \\\n    -DPCRE2_BUILD_LIBPCRE2_PDB=OFF\n\ncmake_build \"$SRCROOT/../../../../extension/llm/tokenizers/third-party/sentencepiece\" \"sentencepiece-static\" \\\n    -DSPM_ENABLE_SHARED=OFF\n    \ncmake_build \"$SRCROOT/../../../../extension/llm/tokenizers/third-party/llama.cpp-unicode\" \"install\"\n    \n# Include the single header for json.\nmkdir -p \"$CMAKE_DIR/include/nlohmann\"\ncp \"$SRCROOT/../../../../extension/llm/tokenizers/third-party/json/single_include/nlohmann/json.hpp\" \"$CMAKE_DIR/include/nlohmann/json.hpp\"\n\necho \"$(find $CMAKE_DIR/lib -name \"*.a\" | sed -E 's|^.*/lib([^/]+)\\.a|-l\\1|g' | tr '\\n' ' ')\" > \"$CMAKE_DIR/linker_flags\"\n";
+			shellScript = "set -e\n\nif ! command -v cmake &> /dev/null\nthen\n    echo \"Cmake not found, please install Cmake. \\n1. Download Cmake.app from https://cmake.org/download with version > 3.19. \\n2. Install it to Applications/ folder and run `sudo /Applications/CMake.app/Contents/bin/cmake-gui --install` to install CMake commandline tools.\"\n    exit 1\nfi\n\nCMAKE_DIR=\"$TEMP_DIR/cmake\"\nrm -rf \"$CMAKE_DIR\"\n\nPLATFORM=\"SIMULATORARM64\"\nDEPLOYMENT_TARGET=\"17.0\"\n\nif [[ \"$PLATFORM_NAME\" == *\"iphoneos\"* ]]; then\n  PLATFORM=\"OS64\"\nelif [[ \"$PLATFORM_NAME\" == *\"macos\"* ]]; then\n  PLATFORM=\"MAC_ARM64\"\n  DEPLOYMENT_TARGET=\"12.0\"\nfi\n\ncmake_build() {\n    local src_dir=$1\n    local target=$2\n    shift 2\n    local extra_args=(\"$@\")\n    local build_dir=\"$CMAKE_DIR/build/$(basename \"$src_dir\")\"\n\n    mkdir -p \"$build_dir\" && cd \"$build_dir\"\n\n    if [[ \"$PLATFORM\" == \"MAC_ARM64\" ]]; then\n        extra_args+=(-DCMAKE_INSTALL_BUNDLEDIR=\"${CMAKE_DIR}/bin\")\n        extra_args+=(-DCMAKE_MACOSX_BUNDLE=OFF)\n    fi\n    cmake -G Xcode \\\n          -DCMAKE_BUILD_TYPE=\"Release\" \\\n          -DCMAKE_CXX_STANDARD=17 \\\n          -DCMAKE_TOOLCHAIN_FILE=\"$SRCROOT/../../../../third-party/ios-cmake/ios.toolchain.cmake\" \\\n          -DCMAKE_XCODE_ATTRIBUTE_CLANG_CXX_LANGUAGE_STANDARD=\"c++17\" \\\n          -DCMAKE_XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY=\"libc++\" \\\n          -DPLATFORM=\"$PLATFORM\" \\\n          -DDEPLOYMENT_TARGET=\"$DEPLOYMENT_TARGET\" \\\n          -DCMAKE_INSTALL_PREFIX=\"$CMAKE_DIR\" \\\n          \"${extra_args[@]}\" \\\n          \"$src_dir\"\n    cmake --build . --config \"Release\" --target \"$target\"\n    if [[ \"$target\" == \"install\" ]]; then\n        cmake --install . --prefix \"$CMAKE_DIR\"\n    fi\n}\n\ncmake_build \"$SRCROOT/../../../../extension/llm/tokenizers/third-party/abseil-cpp\" \"install\" \\\n    -DABSL_PROPAGATE_CXX_STD=ON\n\ncmake_build \"$SRCROOT/../../../../extension/llm/tokenizers/third-party/re2\" \"install\"\n\ncmake_build \"$SRCROOT/../../../../extension/llm/tokenizers/third-party/pcre2\" \"install\" \\\n    -DPCRE2_BUILD_PCRE2_8=ON \\\n    -DPCRE2_BUILD_PCRE2_16=OFF \\\n    -DPCRE2_BUILD_PCRE2_32=OFF \\\n    -DPCRE2_BUILD_TESTS=OFF \\\n    -DPCRE2_BUILD_PCRE2GREP=OFF \\\n    -DPCRE2_BUILD_PCRE2TEST=OFF \\\n    -DPCRE2_BUILD_PCRE2GPERF=OFF \\\n    -DPCRE2_BUILD_DOCS=OFF \\\n    -DPCRE2_BUILD_LIBPCRE2_PDB=OFF\n\ncmake_build \"$SRCROOT/../../../../extension/llm/tokenizers/third-party/sentencepiece\" \"sentencepiece-static\" \\\n    -DSPM_ENABLE_SHARED=OFF\n    \ncmake_build \"$SRCROOT/../../../../extension/llm/tokenizers/third-party/llama.cpp-unicode\" \"install\"\n    \n# Include the single header for json.\nmkdir -p \"$CMAKE_DIR/include/nlohmann\"\ncp \"$SRCROOT/../../../../extension/llm/tokenizers/third-party/json/single_include/nlohmann/json.hpp\" \"$CMAKE_DIR/include/nlohmann/json.hpp\"\n\necho \"$(find $CMAKE_DIR/lib -name \"*.a\" | sed -E 's|^.*/lib([^/]+)\\.a|-l\\1|g' | tr '\\n' ' ')\" > \"$CMAKE_DIR/linker_flags\"\n";
 		};
 /* End PBXShellScriptBuildPhase section */
 
diff --git a/examples/demo-apps/apple_ios/LLaMA/docs/delegates/mps_README.md b/examples/demo-apps/apple_ios/LLaMA/docs/delegates/mps_README.md
index b16f27410af..47352607bca 100644
--- a/examples/demo-apps/apple_ios/LLaMA/docs/delegates/mps_README.md
+++ b/examples/demo-apps/apple_ios/LLaMA/docs/delegates/mps_README.md
@@ -36,7 +36,6 @@ Install dependencies
 
 ```
 ./install_executorch.sh
-./backends/apple/mps/install_requirements.sh
 ```
 
 ## Prepare Models
diff --git a/examples/demo-apps/react-native/rnllama/README.md b/examples/demo-apps/react-native/rnllama/README.md
index f017c8bfa22..7729f7a153a 100644
--- a/examples/demo-apps/react-native/rnllama/README.md
+++ b/examples/demo-apps/react-native/rnllama/README.md
@@ -26,7 +26,7 @@ A React Native mobile application for running LLaMA language models using ExecuT
 
 3. Pull submodules: `git submodule sync && git submodule update --init`
 
-4. Install dependencies: `./install_executorch.sh --pybind xnnpack && ./examples/models/llama/install_requirements.sh`
+4. Install dependencies: `./install_executorch.sh && ./examples/models/llama/install_requirements.sh`
 
 5. Follow the instructions in the [README](https://github.com/pytorch/executorch/blob/main/examples/models/llama/README.md#option-a-download-and-export-llama32-1b3b-model) to export a model as `.pte`
 
diff --git a/examples/demo-apps/react-native/rnllama/ios/rnllama.xcodeproj/project.pbxproj b/examples/demo-apps/react-native/rnllama/ios/rnllama.xcodeproj/project.pbxproj
index 5d9d01cfff7..1a56daafaea 100644
--- a/examples/demo-apps/react-native/rnllama/ios/rnllama.xcodeproj/project.pbxproj
+++ b/examples/demo-apps/react-native/rnllama/ios/rnllama.xcodeproj/project.pbxproj
@@ -557,7 +557,7 @@
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 			shellPath = /bin/sh;
-			shellScript = "set -e\n\nif ! command -v cmake &> /dev/null\nthen\n    echo \"cmake not found, please install cmake. \\n1. Download Cmake.app from https://cmake.org/download with version > 3.19. \\n2. Install it to Applications/ folder and run sudo /Applications/CMake.app/Contents/bin/cmake-gui --install to install CMake commandline tools.\"\n    exit 1\nfi\n\nCMAKE_DIR=\"$TEMP_DIR/cmake\"\nrm -rf \"$CMAKE_DIR\"\n\nPLATFORM=\"SIMULATORARM64\"\nDEPLOYMENT_TARGET=\"17.0\"\n\nif [[ \"$PLATFORM_NAME\" == *\"iphoneos\"* ]]; then\n  PLATFORM=\"OS64\"\nelif [[ \"$PLATFORM_NAME\" == *\"macos\"* ]]; then\n  PLATFORM=\"MAC_ARM64\"\n  DEPLOYMENT_TARGET=\"10.15\"\nfi\n\ncmake_build() {\n    local src_dir=$1\n    shift\n    local extra_args=(\"$@\")\n    local build_dir=\"$CMAKE_DIR/build/$(basename \"$src_dir\")\"\n\n    mkdir -p \"$build_dir\" && cd \"$build_dir\"\n    cmake -G Xcode \\\n          -DCMAKE_BUILD_TYPE=\"Release\" \\\n          -DCMAKE_CXX_STANDARD=17 \\\n          -DCMAKE_TOOLCHAIN_FILE=\"$PROJECT_DIR/../../../../../third-party/ios-cmake/ios.toolchain.cmake\" \\\n          -DCMAKE_XCODE_ATTRIBUTE_CLANG_CXX_LANGUAGE_STANDARD=\"c++17\" \\\n          -DCMAKE_XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY=\"libc++\" \\\n          -DPLATFORM=\"$PLATFORM\" \\\n          -DDEPLOYMENT_TARGET=\"$DEPLOYMENT_TARGET\" \\\n          \"${extra_args[@]}\" \\\n          \"$src_dir\"\n    cmake --build . --config \"Release\"\n    cmake --install . --prefix \"$CMAKE_DIR\"\n}\n\ncmake_build \"$PROJECT_DIR/../../../../../extension/llm/third-party/abseil-cpp\" \\\n    -DABSL_PROPAGATE_CXX_STD=ON\n    \ncmake_build \"$PROJECT_DIR/../../../../../extension/llm/third-party/re2\" \\\n    -DCMAKE_PREFIX_PATH=\"$CMAKE_DIR/lib/cmake/absl\"\n    \ncmake_build \"$PROJECT_DIR/../../../../../extension/llm/third-party/sentencepiece\" \\\n    -DSPM_ENABLE_SHARED=OFF\n\necho \"$(find $CMAKE_DIR/lib -name \"*.a\" | sed -E 's|^.*/lib([^/]+)\\.a|-l\\1|g' | tr '\\n' ' ')\" > \"$CMAKE_DIR/linker_flags\"\n\n\n\n";
+			shellScript = "set -e\n\nif ! command -v cmake &> /dev/null\nthen\n    echo \"cmake not found, please install cmake. \\n1. Download Cmake.app from https://cmake.org/download with version > 3.19. \\n2. Install it to Applications/ folder and run sudo /Applications/CMake.app/Contents/bin/cmake-gui --install to install CMake commandline tools.\"\n    exit 1\nfi\n\nCMAKE_DIR=\"$TEMP_DIR/cmake\"\nrm -rf \"$CMAKE_DIR\"\n\nPLATFORM=\"SIMULATORARM64\"\nDEPLOYMENT_TARGET=\"17.0\"\n\nif [[ \"$PLATFORM_NAME\" == *\"iphoneos\"* ]]; then\n  PLATFORM=\"OS64\"\nelif [[ \"$PLATFORM_NAME\" == *\"macos\"* ]]; then\n  PLATFORM=\"MAC_ARM64\"\n  DEPLOYMENT_TARGET=\"12.0\"\nfi\n\ncmake_build() {\n    local src_dir=$1\n    shift\n    local extra_args=(\"$@\")\n    local build_dir=\"$CMAKE_DIR/build/$(basename \"$src_dir\")\"\n\n    mkdir -p \"$build_dir\" && cd \"$build_dir\"\n    cmake -G Xcode \\\n          -DCMAKE_BUILD_TYPE=\"Release\" \\\n          -DCMAKE_CXX_STANDARD=17 \\\n          -DCMAKE_TOOLCHAIN_FILE=\"$PROJECT_DIR/../../../../../third-party/ios-cmake/ios.toolchain.cmake\" \\\n          -DCMAKE_XCODE_ATTRIBUTE_CLANG_CXX_LANGUAGE_STANDARD=\"c++17\" \\\n          -DCMAKE_XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY=\"libc++\" \\\n          -DPLATFORM=\"$PLATFORM\" \\\n          -DDEPLOYMENT_TARGET=\"$DEPLOYMENT_TARGET\" \\\n          \"${extra_args[@]}\" \\\n          \"$src_dir\"\n    cmake --build . --config \"Release\"\n    cmake --install . --prefix \"$CMAKE_DIR\"\n}\n\ncmake_build \"$PROJECT_DIR/../../../../../extension/llm/third-party/abseil-cpp\" \\\n    -DABSL_PROPAGATE_CXX_STD=ON\n    \ncmake_build \"$PROJECT_DIR/../../../../../extension/llm/third-party/re2\" \\\n    -DCMAKE_PREFIX_PATH=\"$CMAKE_DIR/lib/cmake/absl\"\n    \ncmake_build \"$PROJECT_DIR/../../../../../extension/llm/third-party/sentencepiece\" \\\n    -DSPM_ENABLE_SHARED=OFF\n\necho \"$(find $CMAKE_DIR/lib -name \"*.a\" | sed -E 's|^.*/lib([^/]+)\\.a|-l\\1|g' | tr '\\n' ' ')\" > \"$CMAKE_DIR/linker_flags\"\n\n\n\n";
 		};
 		F7CCCCE770493310D0125117 /* [Expo] Configure project */ = {
 			isa = PBXShellScriptBuildPhase;
diff --git a/examples/models/llama/README.md b/examples/models/llama/README.md
index 041c7bb1d97..52d7baeabbf 100644
--- a/examples/models/llama/README.md
+++ b/examples/models/llama/README.md
@@ -148,7 +148,7 @@ Llama 3 8B performance was measured on the Samsung Galaxy S22, S24, and OnePlus
 ## Step 1: Setup
 > :warning: **double check your python environment**: make sure `conda activate <VENV>` is run before all the bash and python scripts.
 
-1. Follow the [tutorial](https://pytorch.org/executorch/main/getting-started-setup) to set up ExecuTorch. For installation run `./install_executorch.sh --pybind xnnpack`
+1. Follow the [tutorial](https://pytorch.org/executorch/main/getting-started-setup) to set up ExecuTorch. For installation run `./install_executorch.sh`
 2. Run `examples/models/llama/install_requirements.sh` to install a few dependencies.
 
 
@@ -164,7 +164,7 @@ Llama 3 8B performance was measured on the Samsung Galaxy S22, S24, and OnePlus
 ```
 # No quantization
 # Set these paths to point to the downloaded files
-LLAMA_CHECKPOINT=path/to/checkpoint.pth
+LLAMA_CHECKPOINT=path/to/consolidated.00.pth
 LLAMA_PARAMS=path/to/params.json
 
 python -m examples.models.llama.export_llama \
@@ -186,7 +186,7 @@ For convenience, an [exported ExecuTorch bf16 model](https://huggingface.co/exec
 ```
 # SpinQuant
 # Set these paths to point to the exported files
-LLAMA_QUANTIZED_CHECKPOINT=path/to/spinquant/checkpoint.pth
+LLAMA_QUANTIZED_CHECKPOINT=path/to/spinquant/consolidated.00.pth.pth
 LLAMA_PARAMS=path/to/spinquant/params.json
 
 python -m examples.models.llama.export_llama \
@@ -215,7 +215,7 @@ For convenience, an [exported ExecuTorch SpinQuant model](https://huggingface.co
 ```
 # QAT+LoRA
 # Set these paths to point to the exported files
-LLAMA_QUANTIZED_CHECKPOINT=path/to/qlora/checkpoint.pth
+LLAMA_QUANTIZED_CHECKPOINT=path/to/qlora/consolidated.00.pth.pth
 LLAMA_PARAMS=path/to/qlora/params.json
 
 python -m examples.models.llama.export_llama \
@@ -248,7 +248,7 @@ You can export and run the original Llama 3 8B instruct model.
 2. Export model and generate `.pte` file
     ```
     python -m examples.models.llama.export_llama \
-	    --checkpoint <consolidated.00.pth> \
+	    --checkpoint <consolidated.00.pth.pth> \
 		-p <params.json> \
 		-kv \
 		--use_sdpa_with_kv_cache \
@@ -396,7 +396,7 @@ First export your model for lowbit quantization (step 2 above):
 
 ```
 # Set these paths to point to the downloaded files
-LLAMA_CHECKPOINT=path/to/checkpoint.pth
+LLAMA_CHECKPOINT=path/to/consolidated.00.pth.pth
 LLAMA_PARAMS=path/to/params.json
 
 # Set low-bit quantization parameters
@@ -476,7 +476,7 @@ We use [LM Eval](https://github.com/EleutherAI/lm-evaluation-harness) to evaluat
 For base models, use the following example command to calculate its perplexity based on WikiText.
 ```
 python -m examples.models.llama.eval_llama \
-	-c <checkpoint.pth> \
+	-c <consolidated.00.pth.pth> \
 	-p <params.json> \
 	-t <tokenizer.model/bin> \
 	-kv \
@@ -489,7 +489,7 @@ python -m examples.models.llama.eval_llama \
 For instruct models, use the following example command to calculate its MMLU score.
 ```
 python -m examples.models.llama.eval_llama \
-	-c <checkpoint.pth> \
+	-c <consolidated.00.pth.pth> \
 	-p <params.json> \
 	-t <tokenizer.model/bin> \
 	-kv \
@@ -528,7 +528,7 @@ This example tries to reuse the Python code, with minimal modifications to make
 git clean -xfd
 pip uninstall executorch
 ./install_executorch.sh --clean
-./install_executorch.sh --pybind xnnpack
+./install_executorch.sh
 ```
 - If you encounter `pthread` related issues during link time, add `pthread` in `target_link_libraries` in `CMakeLists.txt`
 - On Mac, if there is linking error in Step 4 with error message like
diff --git a/examples/models/llama/source_transformation/quantize.py b/examples/models/llama/source_transformation/quantize.py
index d2e2d5396d3..19fef857865 100644
--- a/examples/models/llama/source_transformation/quantize.py
+++ b/examples/models/llama/source_transformation/quantize.py
@@ -8,7 +8,7 @@
 import re
 from functools import partial
 from pathlib import Path
-from typing import Any, Dict, Optional
+from typing import Dict, Optional
 
 import torch
 import torch.nn as nn
@@ -16,8 +16,6 @@
 
 from executorch.extension.llm.export.builder import DType
 
-from sentencepiece import SentencePieceProcessor
-
 
 try:
     from fairseq2.nn.embedding import (
@@ -57,7 +55,7 @@ def quantize(  # noqa C901
 
     Args:
         model: The model to quantize.
-        qmode: The quantization mode, e.g. int8, 8da4w, 8da4w-gptq.
+        qmode: The quantization mode, e.g. int8, 8da4w.
         computation_dtype: The dtype that ops are performed in (the resulting dtype of dequantization).
             Also the dtype of the rest of the non-quantized compoents of the model.
         checkpoint_dtype: The dtype of the checkpoint, this arg exists since it is more accurate to
@@ -161,58 +159,6 @@ def quantize(  # noqa C901
         if verbose:
             print("quantized model:", model)
         return model
-    elif qmode == "8da4w-gptq":
-        # Check for required args
-        required_args: Optional[Any] = [
-            group_size,
-            calibration_limit,
-            calibration_seq_length,
-        ]
-        if any(arg is None for arg in required_args):
-            raise Exception(
-                "For 8da4w-gptq quantization, group size, calibration limit and calibration sequence length must be specified."
-            )
-        if calibration_tasks is None:
-            calibration_tasks = ["wikitext"]
-
-        try:
-            # torchao 0.3+
-            from torchao._models._eval import InputRecorder
-        except ImportError:
-            from torchao.quantization.GPTQ import InputRecorder  # pyre-ignore
-
-        from torchao.quantization.quant_api import Int8DynActInt4WeightGPTQQuantizer
-
-        if tokenizer_path is None:
-            assert checkpoint_path is not None, "checkpoint_path must be specified"
-            tokenizer_path = checkpoint_path.parent / "tokenizer.model"
-        assert tokenizer_path.is_file(), tokenizer_path
-        tokenizer = SentencePieceProcessor(  # pyre-ignore[28]
-            model_file=str(tokenizer_path)
-        )
-
-        inputs = (
-            InputRecorder(  # pyre-fixme[16]
-                tokenizer,
-                calibration_seq_length,
-                None,  # input_prep_func
-                pad_calibration_inputs,
-                model.vocab_size,
-            )
-            .record_inputs(
-                calibration_tasks,
-                calibration_limit,
-            )
-            .get_inputs()
-        )
-
-        gptq_quantizer = Int8DynActInt4WeightGPTQQuantizer(
-            blocksize,
-            percdamp,
-            group_size,
-        )  # TODO: separate computation and checkpoint dtype for GPTQ.
-        model = gptq_quantizer.quantize(model, inputs)
-        return model
     elif qmode == "vulkan_4w":
         from executorch.backends.vulkan._passes import VkInt4WeightOnlyQuantizer
 
diff --git a/examples/models/phi-3-mini/README.md b/examples/models/phi-3-mini/README.md
index f52f2a3a06d..3546ce7f1f2 100644
--- a/examples/models/phi-3-mini/README.md
+++ b/examples/models/phi-3-mini/README.md
@@ -3,7 +3,7 @@ This example demonstrates how to run a [Phi-3-mini](https://huggingface.co/micro
 
 # Instructions
 ## Step 1: Setup
-1. Follow the [tutorial](https://pytorch.org/executorch/main/getting-started-setup) to set up ExecuTorch. For installation run `./install_executorch.sh --pybind xnnpack`
+1. Follow the [tutorial](https://pytorch.org/executorch/main/getting-started-setup) to set up ExecuTorch. For installation run `./install_executorch.sh`
 2. Currently, we support transformers v4.44.2. Install transformers with the following command:
 ```
 pip uninstall -y transformers ; pip install transformers==4.44.2
diff --git a/examples/models/qwen3/README.md b/examples/models/qwen3/README.md
index 65923fb020c..a589d27c19d 100644
--- a/examples/models/qwen3/README.md
+++ b/examples/models/qwen3/README.md
@@ -88,4 +88,4 @@ cmake-out/examples/models/llama/llama_main
 To run the model on an example iOS or Android app, see the Llama README's [Step 5: Build Mobile apps](../llama/README.md#step-5-build-mobile-apps) section.
 
 ### FAQ
-For more help with exporting or running this model, feel free to ask in our [discord channel](https://lnkd.in/gWCM4ViK).
+For more help with exporting or running this model, feel free to ask in our [discord channel](https://discord.gg/UEjkY9Zs).
diff --git a/examples/qualcomm/oss_scripts/deit.py b/examples/qualcomm/oss_scripts/deit.py
new file mode 100644
index 00000000000..5482a77a166
--- /dev/null
+++ b/examples/qualcomm/oss_scripts/deit.py
@@ -0,0 +1,148 @@
+# Copyright (c) Qualcomm Innovation Center, Inc.
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import getpass
+import json
+import os
+from multiprocessing.connection import Client
+
+import numpy as np
+from executorch.backends.qualcomm._passes.qnn_pass_manager import (
+    get_capture_program_passes,
+)
+from executorch.backends.qualcomm.quantizer.quantizer import QuantDtype
+from executorch.examples.qualcomm.utils import (
+    build_executorch_binary,
+    get_imagenet_dataset,
+    make_output_dir,
+    parse_skip_delegation_node,
+    setup_common_args_and_variables,
+    SimpleADB,
+    topk_accuracy,
+)
+from transformers import AutoConfig, AutoModelForImageClassification
+
+
+def get_instance():
+    module = (
+        AutoModelForImageClassification.from_pretrained(
+            "facebook/deit-base-distilled-patch16-224"
+        )
+        .eval()
+        .to("cpu")
+    )
+
+    return module
+
+
+def main(args):
+    skip_node_id_set, skip_node_op_set = parse_skip_delegation_node(args)
+
+    os.makedirs(args.artifact, exist_ok=True)
+    config = AutoConfig.from_pretrained("facebook/deit-base-distilled-patch16-224")
+    data_num = 100
+    height = config.image_size
+    width = config.image_size
+    inputs, targets, input_list = get_imagenet_dataset(
+        dataset_path=f"{args.dataset}",
+        data_size=data_num,
+        image_shape=(height, width),
+        crop_size=(height, width),
+    )
+
+    # Get the Deit model.
+    model = get_instance()
+    pte_filename = "deit_qnn"
+
+    # lower to QNN
+    passes_job = get_capture_program_passes()
+    build_executorch_binary(
+        model,
+        inputs[0],
+        args.model,
+        f"{args.artifact}/{pte_filename}",
+        dataset=inputs,
+        skip_node_id_set=skip_node_id_set,
+        skip_node_op_set=skip_node_op_set,
+        quant_dtype=QuantDtype.use_8a8w,
+        passes_job=passes_job,
+        shared_buffer=args.shared_buffer,
+    )
+
+    if args.compile_only:
+        return
+
+    workspace = f"/data/local/tmp/{getpass.getuser()}/executorch/{pte_filename}"
+    pte_path = f"{args.artifact}/{pte_filename}.pte"
+
+    adb = SimpleADB(
+        qnn_sdk=os.getenv("QNN_SDK_ROOT"),
+        build_path=f"{args.build_folder}",
+        pte_path=pte_path,
+        workspace=workspace,
+        device_id=args.device,
+        host_id=args.host,
+        soc_model=args.model,
+    )
+    adb.push(inputs=inputs, input_list=input_list)
+    adb.execute()
+
+    # collect output data
+    output_data_folder = f"{args.artifact}/outputs"
+    make_output_dir(output_data_folder)
+
+    adb.pull(output_path=args.artifact)
+
+    # top-k analysis
+    predictions = []
+    for i in range(data_num):
+        predictions.append(
+            np.fromfile(
+                os.path.join(output_data_folder, f"output_{i}_0.raw"), dtype=np.float32
+            )
+        )
+
+    k_val = [1, 5]
+    topk = [topk_accuracy(predictions, targets, k).item() for k in k_val]
+    if args.ip and args.port != -1:
+        with Client((args.ip, args.port)) as conn:
+            conn.send(json.dumps({f"top_{k}": topk[i] for i, k in enumerate(k_val)}))
+    else:
+        for i, k in enumerate(k_val):
+            print(f"top_{k}->{topk[i]}%")
+
+
+if __name__ == "__main__":
+    parser = setup_common_args_and_variables()
+    parser.add_argument(
+        "-a",
+        "--artifact",
+        help="path for storing generated artifacts and output by this example. Default ./deit_qnn",
+        default="./deit_qnn",
+        type=str,
+    )
+
+    parser.add_argument(
+        "-d",
+        "--dataset",
+        help=(
+            "path to the validation folder of ImageNet dataset. "
+            "e.g. --dataset imagenet-mini/val "
+            "for https://www.kaggle.com/datasets/ifigotin/imagenetmini-1000)"
+        ),
+        type=str,
+        required=True,
+    )
+
+    args = parser.parse_args()
+    try:
+        main(args)
+    except Exception as e:
+        if args.ip and args.port != -1:
+            with Client((args.ip, args.port)) as conn:
+                conn.send(json.dumps({"Error": str(e)}))
+        else:
+            raise Exception(e)
diff --git a/examples/qualcomm/oss_scripts/efficientnet.py b/examples/qualcomm/oss_scripts/efficientnet.py
new file mode 100644
index 00000000000..b11ad7abc47
--- /dev/null
+++ b/examples/qualcomm/oss_scripts/efficientnet.py
@@ -0,0 +1,145 @@
+# Copyright (c) Qualcomm Innovation Center, Inc.
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import json
+import logging
+import os
+from multiprocessing.connection import Client
+
+import numpy as np
+
+import torch
+from executorch.backends.qualcomm.quantizer.quantizer import QuantDtype
+from executorch.examples.qualcomm.utils import (
+    build_executorch_binary,
+    get_imagenet_dataset,
+    make_output_dir,
+    parse_skip_delegation_node,
+    setup_common_args_and_variables,
+    SimpleADB,
+    topk_accuracy,
+)
+from transformers import AutoModelForImageClassification
+
+
+def main(args):
+    skip_node_id_set, skip_node_op_set = parse_skip_delegation_node(args)
+
+    # ensure the working directory exist.
+    os.makedirs(args.artifact, exist_ok=True)
+
+    if not args.compile_only and args.device is None:
+        raise RuntimeError(
+            "device serial is required if not compile only. "
+            "Please specify a device serial by -s/--device argument."
+        )
+
+    data_num = 100
+    if args.ci:
+        inputs = [(torch.rand(1, 3, 224, 224),)]
+        logging.warning(
+            "This option is for CI to verify the export flow. It uses random input and will result in poor accuracy."
+        )
+    else:
+        inputs, targets, input_list = get_imagenet_dataset(
+            dataset_path=f"{args.dataset}",
+            data_size=data_num,
+            image_shape=(256, 256),
+            crop_size=224,
+        )
+
+    module = (
+        AutoModelForImageClassification.from_pretrained("google/efficientnet-b0")
+        .eval()
+        .to("cpu")
+    )
+    pte_filename = "efficientnet_qnn_q16"
+    build_executorch_binary(
+        module.eval(),
+        inputs[0],
+        args.model,
+        f"{args.artifact}/{pte_filename}",
+        inputs,
+        skip_node_id_set=skip_node_id_set,
+        skip_node_op_set=skip_node_op_set,
+        quant_dtype=QuantDtype.use_16a16w,
+        shared_buffer=args.shared_buffer,
+    )
+
+    if args.compile_only:
+        return
+
+    adb = SimpleADB(
+        qnn_sdk=os.getenv("QNN_SDK_ROOT"),
+        build_path=f"{args.build_folder}",
+        pte_path=f"{args.artifact}/{pte_filename}.pte",
+        workspace=f"/data/local/tmp/executorch/{pte_filename}",
+        device_id=args.device,
+        host_id=args.host,
+        soc_model=args.model,
+        shared_buffer=args.shared_buffer,
+    )
+    adb.push(inputs=inputs, input_list=input_list)
+    adb.execute()
+
+    # collect output data
+    output_data_folder = f"{args.artifact}/outputs"
+    make_output_dir(output_data_folder)
+
+    adb.pull(output_path=args.artifact)
+
+    # top-k analysis
+    predictions = []
+    for i in range(data_num):
+        predictions.append(
+            np.fromfile(
+                os.path.join(output_data_folder, f"output_{i}_0.raw"), dtype=np.float32
+            )
+        )
+
+    k_val = [1, 5]
+    topk = [topk_accuracy(predictions, targets, k).item() for k in k_val]
+    if args.ip and args.port != -1:
+        with Client((args.ip, args.port)) as conn:
+            conn.send(json.dumps({f"top_{k}": topk[i] for i, k in enumerate(k_val)}))
+    else:
+        for i, k in enumerate(k_val):
+            print(f"top_{k}->{topk[i]}%")
+
+
+if __name__ == "__main__":
+    parser = setup_common_args_and_variables()
+
+    parser.add_argument(
+        "-d",
+        "--dataset",
+        help=(
+            "path to the validation folder of ImageNet dataset. "
+            "e.g. --dataset imagenet-mini/val "
+            "for https://www.kaggle.com/datasets/ifigotin/imagenetmini-1000)"
+        ),
+        type=str,
+        required=False,
+    )
+
+    parser.add_argument(
+        "-a",
+        "--artifact",
+        help="path for storing generated artifacts by this example. "
+        "Default ./efficientnet",
+        default="./efficientnet",
+        type=str,
+    )
+
+    args = parser.parse_args()
+    try:
+        main(args)
+    except Exception as e:
+        if args.ip and args.port != -1:
+            with Client((args.ip, args.port)) as conn:
+                conn.send(json.dumps({"Error": str(e)}))
+        else:
+            raise Exception(e)
diff --git a/examples/qualcomm/oss_scripts/llama/runner/runner.cpp b/examples/qualcomm/oss_scripts/llama/runner/runner.cpp
index d348878294a..bdc2019352e 100644
--- a/examples/qualcomm/oss_scripts/llama/runner/runner.cpp
+++ b/examples/qualcomm/oss_scripts/llama/runner/runner.cpp
@@ -152,8 +152,10 @@ Error Runner::load() {
 
   // Use attention mask length to retrieve AR length and context length
   // Cache len equals to context_len - ar_len
-  int32_t prompt_processor_ar_len, token_generator_ar_len, max_cache_len,
-      max_ar_len;
+  int32_t prompt_processor_ar_len = 0;
+  int32_t token_generator_ar_len = 0;
+  int32_t max_cache_len = 0;
+  int32_t max_ar_len = 0;
   // atten mask: [1, AR-N, CL]
   auto atten_mask_meta_token = method_meta->input_tensor_meta(1);
   token_generator_ar_len = atten_mask_meta_token->sizes()[1];
diff --git a/examples/xnnpack/README.md b/examples/xnnpack/README.md
index 5c307d34717..6fe1f0488b2 100644
--- a/examples/xnnpack/README.md
+++ b/examples/xnnpack/README.md
@@ -24,7 +24,7 @@ The following command will produce a floating-point XNNPACK delegated model `mv2
 python3 -m examples.xnnpack.aot_compiler --model_name="mv2" --delegate
 ```
 
-Once we have the model binary (pte) file, then let's run it with ExecuTorch runtime using the `xnn_executor_runner`. With cmake, you first configure your cmake with the following:
+Once we have the model binary (pte) file, then let's run it with ExecuTorch runtime using the `executor_runner`. With cmake, you first configure your cmake with the following:
 
 ```bash
 # cd to the root of executorch repo
@@ -56,7 +56,7 @@ cmake --build cmake-out -j9 --target install --config Release
 Now finally you should be able to run this model with the following command
 
 ```bash
-./cmake-out/backends/xnnpack/xnn_executor_runner --model_path ./mv2_xnnpack_fp32.pte
+./cmake-out/executor_runner --model_path ./mv2_xnnpack_fp32.pte
 ```
 
 ## Quantization
@@ -80,7 +80,7 @@ python3 -m examples.xnnpack.quantization.example --help
 ```
 
 ## Running the XNNPACK Model with CMake
-After exporting the XNNPACK Delegated model, we can now try running it with example inputs using CMake. We can build and use the xnn_executor_runner, which is a sample wrapper for the ExecuTorch Runtime and XNNPACK Backend. We first begin by configuring the CMake build like such:
+After exporting the XNNPACK Delegated model, we can now try running it with example inputs using CMake. We can build and use the executor_runner, which is a sample wrapper for the ExecuTorch Runtime. The XNNPACK Backend is enabled via the compilation flag `-DEXECUTORCH_BUILD_XNNPACK=ON`. We first begin by configuring the CMake build like such:
 ```bash
 # cd to the root of executorch repo
 cd executorch
@@ -107,9 +107,9 @@ Then you can build the runtime componenets with
 cmake --build cmake-out -j9 --target install --config Release
 ```
 
-Now you should be able to find the executable built at `./cmake-out/backends/xnnpack/xnn_executor_runner` you can run the executable with the model you generated as such
+Now you should be able to find the executable built at `./cmake-out/executor_runner` you can run the executable with the model you generated as such
 ```bash
-./cmake-out/backends/xnnpack/xnn_executor_runner --model_path=./mv2_quantized.pte
+./cmake-out/executor_runner --model_path=./mv2_quantized.pte
 ```
 
 ## Delegating a Quantized Model
diff --git a/exir/passes/constant_prop_pass.py b/exir/passes/constant_prop_pass.py
index a103568b9a9..fc93aa1b0ca 100644
--- a/exir/passes/constant_prop_pass.py
+++ b/exir/passes/constant_prop_pass.py
@@ -295,6 +295,37 @@ def create_constant_nodes_and_return_specs(
     return name_to_spec_dict
 
 
+def _update_output_node_and_specs(exported_program: ExportedProgram) -> None:
+    """
+    Update the output node and output specs in the exported program.
+    In case a constant node is used as output, we replace it with a clone of the constant node.
+    """
+    # Dict [node.name -> InputSpec]
+    updated_constant_placeholders = get_constant_placeholder_dict(exported_program)
+    output = exported_program.graph.find_nodes(op="output")[0]
+    output_nodes = cast(list[torch.fx.Node], list(output.args[0]))
+    output_specs = exported_program.graph_signature.output_specs
+    assert len(output_nodes) == len(output_specs)
+
+    for i in range(len(output_specs)):
+        out_node = output_nodes[i]
+        if out_node not in updated_constant_placeholders:
+            continue
+
+        with exported_program.graph.inserting_after(out_node):
+            new_node = exported_program.graph.call_function(
+                exir_ops.edge.aten.clone.default, (out_node,)
+            )
+        assert "val" in out_node.meta
+        new_node.meta["val"] = out_node.meta["val"]
+        output_nodes[i] = new_node
+
+        # Update the constant-propagated output node.
+        output_specs[i].arg = TensorArgument(name=output_nodes[i].name)
+
+    output.args = (output_nodes,)
+
+
 def constant_prop_pass(
     exported_program: ExportedProgram,
     custom_skip_targets: Optional[set[EdgeOpOverload]] = None,
@@ -341,12 +372,12 @@ def constant_prop_pass(
 
     # Generate new input spec.
     new_input_specs = []
-    for node in exported_program.graph.nodes:
-        if node.op != "placeholder":
-            continue
+    for node in exported_program.graph.find_nodes(op="placeholder"):
         new_input_specs.append(name_to_spec_dict[node.name])
     exported_program.graph_signature.input_specs = new_input_specs
 
+    _update_output_node_and_specs(exported_program)
+
     # Cleanup the graph.
     exported_program.graph.eliminate_dead_code()
     exported_program.graph_module.recompile()
diff --git a/exir/tests/test_passes.py b/exir/tests/test_passes.py
index da072b0f838..ca2b5ebdc35 100644
--- a/exir/tests/test_passes.py
+++ b/exir/tests/test_passes.py
@@ -1026,6 +1026,34 @@ def forward(self, x):
             "executorch_exir_dialects_edge__ops_aten_slice_copy_Tensor"
         ).run(gm.code)
 
+    def test_constant_prop_for_output(self) -> None:
+        class Add(torch.nn.Module):
+            def forward(self) -> torch.Tensor:
+                return torch.add(torch.tensor(3), torch.tensor(5))
+
+        add = Add()
+
+        edge = to_edge(
+            export(add, (), strict=True),
+            compile_config=EdgeCompileConfig(_skip_dim_order=False),
+        )
+        # Check there is a lifted tensor followed by a to_copy node
+        FileCheck().check("c_lifted_tensor_0").check("c_lifted_tensor_1").run(
+            edge.exported_program().graph_module.code
+        )
+
+        edge._edge_programs["forward"] = constant_prop_pass(
+            edge.exported_program("forward")
+        )
+
+        # Check (c_lifted_tensor_*) nodes are all replaced by _prop_tensor_constant.
+        FileCheck().check_not("c_lifted_tensor_").check("_prop_tensor_constant").run(
+            edge.exported_program().graph_module.code
+        )
+        # Validate that the program successfully passes validation to executorch:
+        edge.exported_program()._validate()
+        edge.to_executorch()
+
     def test_constant_prop_pass_for_add(self) -> None:
         class Add(torch.nn.Module):
             def forward(self, x: torch.Tensor) -> torch.Tensor:
diff --git a/export/TARGETS b/export/TARGETS
index ae8be8a5e98..bf1002a701e 100644
--- a/export/TARGETS
+++ b/export/TARGETS
@@ -12,6 +12,7 @@ python_library(
         "//executorch/exir/backend:backend_api",
         "//executorch/exir:pass_manager",
         "//executorch/devtools/backend_debug:delegation_info",
+        "//executorch/extension/export_util:export_util",
     ]
 )
 
diff --git a/export/export.py b/export/export.py
index 593f9b91157..7dd6b239d0a 100644
--- a/export/export.py
+++ b/export/export.py
@@ -4,16 +4,19 @@
 import torch
 from executorch.devtools.backend_debug import get_delegation_info
 from executorch.exir._warnings import experimental
+from executorch.exir.backend.backend_api import validation_disabled
 from executorch.exir.program import (
     EdgeProgramManager,
     ExecutorchProgramManager,
     to_edge_transform_and_lower,
 )
 from executorch.exir.schema import Program
+from executorch.extension.export_util.utils import save_pte_program
 from executorch.runtime import Runtime, Verification
 from tabulate import tabulate
 from torch import nn
 from torch.ao.quantization import allow_exported_model_train_eval
+from torch.ao.quantization.quantizer.composable_quantizer import ComposableQuantizer
 from torch.export import ExportedProgram
 from torchao.quantization import quantize_
 from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e
@@ -145,15 +148,15 @@ def run(
                     model,
                     self._example_inputs_dict[method_name][0],
                     dynamic_shapes=dynamic_shapes,
+                    strict=True,
                 )
 
                 # Apply pre-edge transform passes if available
                 if self._pre_edge_transform_passes is not None:
-                    self._exported_program[method_name] = (
-                        self._pre_edge_transform_passes(
+                    for pre_edge_transform_pass in self._pre_edge_transform_passes:
+                        self._exported_program[method_name] = pre_edge_transform_pass(
                             self._exported_program[method_name]
                         )
-                    )
 
     def get_artifacts(self) -> Dict[str, ExportedProgram]:
         """
@@ -210,13 +213,14 @@ def run(
             self._constant_methods = transform_config.get("constant_methods", None)
 
         # Process inputs
-        self._edge_program_manager = to_edge_transform_and_lower(
-            self._exported_program,
-            partitioner=self._partitioners,
-            transform_passes=self._transform_passes,
-            constant_methods=self._constant_methods,
-            compile_config=self._compile_config,
-        )
+        with validation_disabled():
+            self._edge_program_manager = to_edge_transform_and_lower(
+                self._exported_program,
+                partitioner=self._partitioners,
+                transform_passes=self._transform_passes,
+                constant_methods=self._constant_methods,
+                compile_config=self._compile_config,
+            )
         self._delegation_info = get_delegation_info(
             self._edge_program_manager.exported_program().graph_module
         )
@@ -345,8 +349,8 @@ class QuantizeStage(Stage):
     Optional stage: Perform post-training quantization on the model.
     """
 
-    def __init__(self, quantizer: Any) -> None:
-        self._quantizer = quantizer
+    def __init__(self, quantizers: Any) -> None:
+        self._quantizers = quantizers
         self._quantized_models: Dict[str, nn.Module] = {}
         self._model_dict: Dict[str, nn.Module] = {}
         self._exported_program_dict: Dict[str, ExportedProgram] = {}
@@ -394,7 +398,8 @@ def run(
             model = exported_program.module()
 
             # Prepare the model for quantization
-            prepared_model = prepare_pt2e(model, self._quantizer)  # type: ignore
+            composed_quantizer = ComposableQuantizer(self._quantizers)
+            prepared_model = prepare_pt2e(model, composed_quantizer)  # type: ignore
 
             # Allow the model to switch between train and eval modes
             allow_exported_model_train_eval(prepared_model)
@@ -546,9 +551,9 @@ def __init__(
 
         # Create the quantize stage if a quantizer is provided
         if self._export_recipe.quantization_recipe is not None:
-            quantizer = self._export_recipe.quantization_recipe.get_quantizer()
-            if quantizer is not None:
-                quantize_stage = QuantizeStage(quantizer=quantizer)
+            quantizers = self._export_recipe.quantization_recipe.get_quantizers()
+            if quantizers is not None:
+                quantize_stage = QuantizeStage(quantizers=quantizers)
                 self._pipeline.append(quantize_stage)
 
         # Create the edge transform and lower stage
@@ -661,6 +666,22 @@ def get_executorch_program(self) -> Program:
             )
         return self._executorch_program_manager.executorch_program
 
+    def get_executorch_program_manager(self) -> ExecutorchProgramManager:
+        """
+        Get the ExecutorchProgramManager.
+
+        Returns:
+            The ExecutorchProgramManager
+
+        Raises:
+            RuntimeError: If the executorch program manager is not initialized
+        """
+        if self._executorch_program_manager is None:
+            raise RuntimeError(
+                "Executorch program manager is not initialized. Run export() first."
+            )
+        return self._executorch_program_manager
+
     def get_pte_buffer(self) -> bytes:
         """
         Get the PTE buffer as bytes.
@@ -677,6 +698,20 @@ def get_pte_buffer(self) -> bytes:
             )
         return self._executorch_program_manager.buffer
 
+    def save_to_pte(self, output_name: str) -> None:
+        """
+        Save the model to a .pte file.
+
+        Args:
+            output_name (Optional[str]): The name of the .pte file.
+        """
+        assert output_name, "Need a valid output name"
+        if self._executorch_program_manager is None:
+            raise RuntimeError(
+                "Executorch program manager is not initialized. Run export() first."
+            )
+        save_pte_program(self._executorch_program_manager, output_name)
+
     def get_example_input(
         self, method_name: str = "forward"
     ) -> Tuple[torch.Tensor, ...]:
diff --git a/export/recipe.py b/export/recipe.py
index 7b743c0aa4c..b993fce26e3 100644
--- a/export/recipe.py
+++ b/export/recipe.py
@@ -49,17 +49,17 @@ class QuantizationRecipe:
         quantizer: Optional quantizer for model quantization
     """
 
-    quantizer: Optional[Quantizer] = None
+    quantizers: Optional[List[Quantizer]] = None
     ao_base_config: Optional[List[AOBaseConfig]] = None
 
-    def get_quantizer(self) -> Optional[Quantizer]:
+    def get_quantizers(self) -> Optional[Quantizer]:
         """
         Get the quantizer associated with this recipe.
 
         Returns:
             The quantizer if one is set, otherwise None
         """
-        return self.quantizer
+        return self.quantizers
 
 
 @experimental(
@@ -94,10 +94,11 @@ class ExportRecipe:
     )
     pre_edge_transform_passes: Optional[
         Callable[[ExportedProgram], ExportedProgram]
+        | List[Callable[[ExportedProgram], ExportedProgram]]
     ] = None
     edge_transform_passes: Optional[Sequence[PassType]] = None
     transform_check_ir_validity: bool = True
-    partitioners: Optional[list[Partitioner]] = None
+    partitioners: Optional[List[Partitioner]] = None
     executorch_backend_config: Optional[ExecutorchBackendConfig] = (
         None  # pyre-ignore[11]: Type not defined
     )
diff --git a/extension/android/executorch_android/src/androidTest/java/org/pytorch/executorch/LlmModuleInstrumentationTest.kt b/extension/android/executorch_android/src/androidTest/java/org/pytorch/executorch/LlmModuleInstrumentationTest.kt
index 43ce302a7a6..2df45f14985 100644
--- a/extension/android/executorch_android/src/androidTest/java/org/pytorch/executorch/LlmModuleInstrumentationTest.kt
+++ b/extension/android/executorch_android/src/androidTest/java/org/pytorch/executorch/LlmModuleInstrumentationTest.kt
@@ -18,10 +18,14 @@ import org.apache.commons.io.FileUtils
 import org.json.JSONException
 import org.json.JSONObject
 import org.junit.Assert
+import org.junit.Assert.assertEquals
+import org.junit.Assert.assertThat
+import org.junit.Assert.assertTrue
 import org.junit.Before
 import org.junit.Rule
 import org.junit.Test
 import org.junit.runner.RunWith
+import org.pytorch.executorch.TestFileUtils.getTestFilePath
 import org.pytorch.executorch.extension.llm.LlmCallback
 import org.pytorch.executorch.extension.llm.LlmModule
 
@@ -30,7 +34,7 @@ import org.pytorch.executorch.extension.llm.LlmModule
 class LlmModuleInstrumentationTest : LlmCallback {
     private val results: MutableList<String> = ArrayList()
     private val tokensPerSecond: MutableList<Float> = ArrayList()
-    private var llmModule: LlmModule? = null
+    private lateinit var llmModule: LlmModule
 
     @Before
     @Throws(IOException::class)
@@ -57,25 +61,25 @@ class LlmModuleInstrumentationTest : LlmCallback {
     @Test
     @Throws(IOException::class, URISyntaxException::class)
     fun testGenerate() {
-        val loadResult = llmModule!!.load()
+        val loadResult = llmModule.load()
         // Check that the model can be load successfully
-        Assert.assertEquals(OK.toLong(), loadResult.toLong())
+        assertEquals(OK.toLong(), loadResult.toLong())
 
-        llmModule!!.generate(TEST_PROMPT, SEQ_LEN, this@LlmModuleInstrumentationTest)
-        Assert.assertEquals(results.size.toLong(), SEQ_LEN.toLong())
-        Assert.assertTrue(tokensPerSecond[tokensPerSecond.size - 1] > 0)
+        llmModule.generate(TEST_PROMPT, SEQ_LEN, this@LlmModuleInstrumentationTest)
+        assertEquals(results.size.toLong(), SEQ_LEN.toLong())
+        assertTrue(tokensPerSecond[tokensPerSecond.size - 1] > 0)
     }
 
     @Test
     @Throws(IOException::class, URISyntaxException::class)
     fun testGenerateAndStop() {
-        llmModule!!.generate(
+        llmModule.generate(
             TEST_PROMPT,
             SEQ_LEN,
             object : LlmCallback {
                 override fun onResult(result: String) {
                     this@LlmModuleInstrumentationTest.onResult(result)
-                    llmModule!!.stop()
+                    llmModule.stop()
                 }
 
                 override fun onStats(stats: String) {
@@ -85,7 +89,7 @@ class LlmModuleInstrumentationTest : LlmCallback {
         )
 
         val stoppedResultSize = results.size
-        Assert.assertTrue(stoppedResultSize < SEQ_LEN)
+        assertTrue(stoppedResultSize < SEQ_LEN)
     }
 
     override fun onResult(result: String) {
@@ -101,7 +105,8 @@ class LlmModuleInstrumentationTest : LlmCallback {
             val promptEvalEndMs = jsonObject.getInt("prompt_eval_end_ms")
             tps = numGeneratedTokens.toFloat() / (inferenceEndMs - promptEvalEndMs) * 1000
             tokensPerSecond.add(tps)
-        } catch (_: JSONException) {}
+        } catch (_: JSONException) {
+        }
     }
 
     companion object {
@@ -110,12 +115,5 @@ class LlmModuleInstrumentationTest : LlmCallback {
         private const val TEST_PROMPT = "Hello"
         private const val OK = 0x00
         private const val SEQ_LEN = 32
-
-        private fun getTestFilePath(fileName: String): String {
-            return InstrumentationRegistry.getInstrumentation()
-                .targetContext
-                .externalCacheDir
-                .toString() + fileName
-        }
     }
 }
diff --git a/extension/android/executorch_android/src/androidTest/java/org/pytorch/executorch/ModuleE2ETest.kt b/extension/android/executorch_android/src/androidTest/java/org/pytorch/executorch/ModuleE2ETest.kt
index 2a1e9d4c8ff..e269f4aa38f 100644
--- a/extension/android/executorch_android/src/androidTest/java/org/pytorch/executorch/ModuleE2ETest.kt
+++ b/extension/android/executorch_android/src/androidTest/java/org/pytorch/executorch/ModuleE2ETest.kt
@@ -22,6 +22,7 @@ import org.junit.Rule
 import org.junit.Test
 import org.junit.runner.RunWith
 import org.pytorch.executorch.TensorImageUtils.bitmapToFloat32Tensor
+import org.pytorch.executorch.TestFileUtils.getTestFilePath
 
 /** Unit tests for [Module]. */
 @RunWith(AndroidJUnit4::class)
@@ -90,12 +91,6 @@ class ModuleE2ETest {
     }
 
     companion object {
-        private fun getTestFilePath(fileName: String): String {
-            return InstrumentationRegistry.getInstrumentation()
-                .targetContext
-                .externalCacheDir
-                .toString() + fileName
-        }
 
         fun argmax(array: FloatArray): Int {
             require(array.isNotEmpty()) { "Array cannot be empty" }
diff --git a/extension/android/executorch_android/src/androidTest/java/org/pytorch/executorch/ModuleInstrumentationTest.kt b/extension/android/executorch_android/src/androidTest/java/org/pytorch/executorch/ModuleInstrumentationTest.kt
index 1885660d0a1..58e9cc8bfef 100644
--- a/extension/android/executorch_android/src/androidTest/java/org/pytorch/executorch/ModuleInstrumentationTest.kt
+++ b/extension/android/executorch_android/src/androidTest/java/org/pytorch/executorch/ModuleInstrumentationTest.kt
@@ -23,6 +23,7 @@ import org.junit.Before
 import org.junit.Rule
 import org.junit.Test
 import org.junit.runner.RunWith
+import org.pytorch.executorch.TestFileUtils.getTestFilePath
 
 /** Unit tests for [Module]. */
 @RunWith(AndroidJUnit4::class)
@@ -173,12 +174,5 @@ class ModuleInstrumentationTest {
         private const val INVALID_STATE = 0x2
         private const val INVALID_ARGUMENT = 0x12
         private const val ACCESS_FAILED = 0x22
-
-        private fun getTestFilePath(fileName: String): String {
-            return InstrumentationRegistry.getInstrumentation()
-                .targetContext
-                .externalCacheDir
-                .toString() + fileName
-        }
     }
 }
diff --git a/extension/android/executorch_android/src/androidTest/java/org/pytorch/executorch/TestFileUtils.kt b/extension/android/executorch_android/src/androidTest/java/org/pytorch/executorch/TestFileUtils.kt
new file mode 100644
index 00000000000..efa364f8e94
--- /dev/null
+++ b/extension/android/executorch_android/src/androidTest/java/org/pytorch/executorch/TestFileUtils.kt
@@ -0,0 +1,16 @@
+package org.pytorch.executorch
+
+import androidx.test.InstrumentationRegistry
+
+/**
+ * Test File Utils
+ */
+object TestFileUtils {
+
+    fun getTestFilePath(fileName: String): String {
+        return InstrumentationRegistry.getInstrumentation()
+            .targetContext
+            .externalCacheDir
+            .toString() + fileName
+    }
+}
diff --git a/extension/apple/ExecuTorch/Exported/ExecuTorch+Tensor.swift b/extension/apple/ExecuTorch/Exported/ExecuTorch+Tensor.swift
index 29af8f78a5a..b325000ed23 100644
--- a/extension/apple/ExecuTorch/Exported/ExecuTorch+Tensor.swift
+++ b/extension/apple/ExecuTorch/Exported/ExecuTorch+Tensor.swift
@@ -54,7 +54,7 @@ public extension Tensor {
   func withUnsafeBytes<T: Scalar, R>(_ body: (UnsafeBufferPointer<T>) throws -> R) throws -> R {
     guard dataType == T.dataType else { throw Error(code: .invalidArgument) }
     var result: Result<R, Error>?
-    bytes { pointer, count, _ in
+    __bytes { pointer, count, _ in
       result = Result { try body(
         UnsafeBufferPointer(
           start: pointer.assumingMemoryBound(to: T.self),
@@ -74,7 +74,7 @@ public extension Tensor {
   func withUnsafeMutableBytes<T: Scalar, R>(_ body: (UnsafeMutableBufferPointer<T>) throws -> R) throws -> R {
     guard dataType == T.dataType else { throw Error(code: .invalidArgument) }
     var result: Result<R, Error>?
-    mutableBytes { pointer, count, _ in
+    __mutableBytes { pointer, count, _ in
       result = Result { try body(
         UnsafeMutableBufferPointer(
           start: pointer.assumingMemoryBound(to: T.self),
diff --git a/extension/apple/ExecuTorch/Exported/ExecuTorchTensor.h b/extension/apple/ExecuTorch/Exported/ExecuTorchTensor.h
index e832845d6ba..5b130da56c9 100644
--- a/extension/apple/ExecuTorch/Exported/ExecuTorchTensor.h
+++ b/extension/apple/ExecuTorch/Exported/ExecuTorchTensor.h
@@ -176,7 +176,7 @@ __attribute__((deprecated("This API is experimental.")))
  *   - and the data type.
  */
 - (void)bytesWithHandler:(NS_NOESCAPE void (^)(const void *pointer, NSInteger count, ExecuTorchDataType dataType))handler
-    NS_SWIFT_NAME(bytes(_:));
+    NS_REFINED_FOR_SWIFT;
 
 /**
  * Executes a block with a pointer to the tensor's mutable byte data.
@@ -187,7 +187,7 @@ __attribute__((deprecated("This API is experimental.")))
  *   - and the data type.
  */
 - (void)mutableBytesWithHandler:(NS_NOESCAPE void (^)(void *pointer, NSInteger count, ExecuTorchDataType dataType))handler
-    NS_SWIFT_NAME(mutableBytes(_:));
+    NS_REFINED_FOR_SWIFT;
 
 /**
  * Resizes the tensor to a new shape.
diff --git a/extension/apple/ExecuTorch/__tests__/TensorTest.swift b/extension/apple/ExecuTorch/__tests__/TensorTest.swift
index 689a514403f..052b84ae5f8 100644
--- a/extension/apple/ExecuTorch/__tests__/TensorTest.swift
+++ b/extension/apple/ExecuTorch/__tests__/TensorTest.swift
@@ -68,13 +68,9 @@ class TensorTest: XCTestCase {
     XCTAssertEqual(tensor.dimensionOrder, [0, 1])
     XCTAssertEqual(tensor.shapeDynamism, .dynamicBound)
     XCTAssertEqual(tensor.count, 6)
-
-    tensor.bytes { pointer, count, dataType in
-      XCTAssertEqual(dataType, .float)
-      XCTAssertEqual(count, 6)
-      XCTAssertEqual(size(ofDataType: dataType), 4)
-      XCTAssertEqual(Array(UnsafeBufferPointer(start: pointer.assumingMemoryBound(to: Float.self), count: count)), data)
-    }
+    XCTAssertNoThrow(try tensor.withUnsafeBytes { buffer in
+      XCTAssertEqual(Array(buffer), data)
+    })
   }
 
   func testInitBytes() {
@@ -91,13 +87,9 @@ class TensorTest: XCTestCase {
     XCTAssertEqual(tensor.dimensionOrder, [0, 1])
     XCTAssertEqual(tensor.shapeDynamism, .dynamicBound)
     XCTAssertEqual(tensor.count, 6)
-
-    tensor.bytes { pointer, count, dataType in
-      XCTAssertEqual(dataType, .double)
-      XCTAssertEqual(count, 6)
-      XCTAssertEqual(size(ofDataType: dataType), 8)
-      XCTAssertEqual(Array(UnsafeBufferPointer(start: pointer.assumingMemoryBound(to: Double.self), count: count)).map { $0 + 1 }, data)
-    }
+    XCTAssertNoThrow(try tensor.withUnsafeBytes { buffer in
+      XCTAssertEqual(Array(buffer).map { $0 + 1 }, data)
+    })
   }
 
   func testInitData() {
@@ -105,9 +97,9 @@ class TensorTest: XCTestCase {
     let data = Data(bytes: dataArray, count: dataArray.count * MemoryLayout<Float>.size)
     let tensor = Tensor(data: data, shape: [4], dataType: .float)
     XCTAssertEqual(tensor.count, 4)
-    tensor.bytes { pointer, count, dataType in
-      XCTAssertEqual(Array(UnsafeBufferPointer(start: pointer.assumingMemoryBound(to: Float.self), count: count)), dataArray)
-    }
+    XCTAssertNoThrow(try tensor.withUnsafeBytes { buffer in
+      XCTAssertEqual(Array(buffer), dataArray)
+    })
   }
 
   func testWithCustomStridesAndDimensionOrder() {
@@ -123,10 +115,9 @@ class TensorTest: XCTestCase {
     XCTAssertEqual(tensor.strides, [1, 2])
     XCTAssertEqual(tensor.dimensionOrder, [1, 0])
     XCTAssertEqual(tensor.count, 4)
-
-    tensor.bytes { pointer, count, dataType in
-      XCTAssertEqual(Array(UnsafeBufferPointer(start: pointer.assumingMemoryBound(to: Float.self), count: count)), data)
-    }
+    XCTAssertNoThrow(try tensor.withUnsafeBytes { buffer in
+      XCTAssertEqual(Array(buffer), data)
+    })
   }
 
   func testMutableBytes() {
@@ -134,41 +125,14 @@ class TensorTest: XCTestCase {
     let tensor = data.withUnsafeMutableBytes {
       Tensor(bytes: $0.baseAddress!, shape: [4], dataType: .int)
     }
-    tensor.mutableBytes { pointer, count, dataType in
-      XCTAssertEqual(dataType, .int)
-      let buffer = pointer.assumingMemoryBound(to: Int32.self)
-      for i in 0..<count {
-        buffer[i] *= 2
-      }
-    }
-    tensor.bytes { pointer, count, dataType in
-      let updatedData = Array(UnsafeBufferPointer(start: pointer.assumingMemoryBound(to: Int32.self), count: count))
-      XCTAssertEqual(updatedData, [2, 4, 6, 8])
-    }
-  }
-
-  func testWithUnsafeBytes() throws {
-    var data: [Float] = [1, 2, 3, 4, 5, 6]
-    let tensor = data.withUnsafeMutableBytes {
-      Tensor(bytesNoCopy: $0.baseAddress!, shape: [2, 3], dataType: .float)
-    }
-    let array = try tensor.withUnsafeBytes([Float].init)
-    XCTAssertEqual(array, data)
-  }
-
-  func testWithUnsafeMutableBytes() throws {
-    var data = [1, 2, 3, 4]
-    let tensor = data.withUnsafeMutableBytes {
-      Tensor(bytes: $0.baseAddress!, shape: [4], dataType: .long)
-    }
-    try tensor.withUnsafeMutableBytes { (buffer: UnsafeMutableBufferPointer<Int>) in
+    XCTAssertNoThrow(try tensor.withUnsafeMutableBytes { (buffer: UnsafeMutableBufferPointer<Int32>) in
       for i in buffer.indices {
         buffer[i] *= 2
       }
-    }
-    try tensor.withUnsafeBytes { buffer in
+    })
+    XCTAssertNoThrow(try tensor.withUnsafeBytes { (buffer: UnsafeBufferPointer<Int32>) in
       XCTAssertEqual(Array(buffer), [2, 4, 6, 8])
-    }
+    })
   }
 
   func testInitWithTensor() {
@@ -202,18 +166,17 @@ class TensorTest: XCTestCase {
   func testResize() {
     var data: [Int] = [1, 2, 3, 4]
     let tensor = data.withUnsafeMutableBytes {
-      Tensor(bytesNoCopy: $0.baseAddress!, shape: [4, 1], dataType: .int)
+      Tensor(bytesNoCopy: $0.baseAddress!, shape: [4, 1], dataType: .long)
     }
     XCTAssertNoThrow(try tensor.resize(to: [2, 2]))
-    XCTAssertEqual(tensor.dataType, .int)
+    XCTAssertEqual(tensor.dataType, .long)
     XCTAssertEqual(tensor.shape, [2, 2])
     XCTAssertEqual(tensor.strides, [2, 1])
     XCTAssertEqual(tensor.dimensionOrder, [0, 1])
     XCTAssertEqual(tensor.count, 4)
-
-    tensor.bytes { pointer, count, dataType in
-      XCTAssertEqual(Array(UnsafeBufferPointer(start: pointer.assumingMemoryBound(to: Int.self), count: count)), data)
-    }
+    XCTAssertNoThrow(try tensor.withUnsafeBytes { buffer in
+      XCTAssertEqual(Array(buffer), data)
+    })
   }
 
   func testResizeError() {
@@ -255,9 +218,9 @@ class TensorTest: XCTestCase {
     XCTAssertEqual(tensor.strides, [1])
     XCTAssertEqual(tensor.dimensionOrder, [0])
     XCTAssertEqual(tensor.count, 6)
-    tensor.bytes { pointer, count, dataType in
-      XCTAssertEqual(Array(UnsafeBufferPointer(start: pointer.assumingMemoryBound(to: UInt8.self), count: count)), data)
-    }
+    XCTAssertNoThrow(try tensor.withUnsafeBytes { buffer in
+      XCTAssertEqual(Array(buffer), data)
+    })
   }
 
   func testInitScalarsInt8() {
@@ -268,9 +231,9 @@ class TensorTest: XCTestCase {
     XCTAssertEqual(tensor.strides, [1])
     XCTAssertEqual(tensor.dimensionOrder, [0])
     XCTAssertEqual(tensor.count, 6)
-    tensor.bytes { pointer, count, dataType in
-      XCTAssertEqual(Array(UnsafeBufferPointer(start: pointer.assumingMemoryBound(to: Int8.self), count: count)), data)
-    }
+    XCTAssertNoThrow(try tensor.withUnsafeBytes { buffer in
+      XCTAssertEqual(Array(buffer), data)
+    })
   }
 
   func testInitScalarsInt16() {
@@ -281,9 +244,9 @@ class TensorTest: XCTestCase {
     XCTAssertEqual(tensor.strides, [1])
     XCTAssertEqual(tensor.dimensionOrder, [0])
     XCTAssertEqual(tensor.count, 6)
-    tensor.bytes { pointer, count, dataType in
-      XCTAssertEqual(Array(UnsafeBufferPointer(start: pointer.assumingMemoryBound(to: Int16.self), count: count)), data)
-    }
+    XCTAssertNoThrow(try tensor.withUnsafeBytes { buffer in
+      XCTAssertEqual(Array(buffer), data)
+    })
   }
 
   func testInitScalarsInt32() {
@@ -294,9 +257,9 @@ class TensorTest: XCTestCase {
     XCTAssertEqual(tensor.strides, [1])
     XCTAssertEqual(tensor.dimensionOrder, [0])
     XCTAssertEqual(tensor.count, 6)
-    tensor.bytes { pointer, count, dataType in
-      XCTAssertEqual(Array(UnsafeBufferPointer(start: pointer.assumingMemoryBound(to: Int32.self), count: count)), data)
-    }
+    XCTAssertNoThrow(try tensor.withUnsafeBytes { buffer in
+      XCTAssertEqual(Array(buffer), data)
+    })
   }
 
   func testInitScalarsInt64() {
@@ -307,9 +270,9 @@ class TensorTest: XCTestCase {
     XCTAssertEqual(tensor.strides, [1])
     XCTAssertEqual(tensor.dimensionOrder, [0])
     XCTAssertEqual(tensor.count, 6)
-    tensor.bytes { pointer, count, dataType in
-      XCTAssertEqual(Array(UnsafeBufferPointer(start: pointer.assumingMemoryBound(to: Int64.self), count: count)), data)
-    }
+    XCTAssertNoThrow(try tensor.withUnsafeBytes { buffer in
+      XCTAssertEqual(Array(buffer), data)
+    })
   }
 
   func testInitScalarsFloat() {
@@ -320,9 +283,9 @@ class TensorTest: XCTestCase {
     XCTAssertEqual(tensor.strides, [1])
     XCTAssertEqual(tensor.dimensionOrder, [0])
     XCTAssertEqual(tensor.count, 6)
-    tensor.bytes { pointer, count, dataType in
-      XCTAssertEqual(Array(UnsafeBufferPointer(start: pointer.assumingMemoryBound(to: Float.self), count: count)), data)
-    }
+    XCTAssertNoThrow(try tensor.withUnsafeBytes { buffer in
+      XCTAssertEqual(Array(buffer), data)
+    })
   }
 
   func testInitScalarsDouble() {
@@ -333,9 +296,9 @@ class TensorTest: XCTestCase {
     XCTAssertEqual(tensor.strides, [1])
     XCTAssertEqual(tensor.dimensionOrder, [0])
     XCTAssertEqual(tensor.count, 6)
-    tensor.bytes { pointer, count, dataType in
-      XCTAssertEqual(Array(UnsafeBufferPointer(start: pointer.assumingMemoryBound(to: Double.self), count: count)), data)
-    }
+    XCTAssertNoThrow(try tensor.withUnsafeBytes { buffer in
+      XCTAssertEqual(Array(buffer), data)
+    })
   }
 
   func testInitScalarsBool() {
@@ -346,9 +309,9 @@ class TensorTest: XCTestCase {
     XCTAssertEqual(tensor.strides, [1])
     XCTAssertEqual(tensor.dimensionOrder, [0])
     XCTAssertEqual(tensor.count, 6)
-    tensor.bytes { pointer, count, dataType in
-      XCTAssertEqual(Array(UnsafeBufferPointer(start: pointer.assumingMemoryBound(to: Bool.self), count: count)), data)
-    }
+    XCTAssertNoThrow(try tensor.withUnsafeBytes { buffer in
+      XCTAssertEqual(Array(buffer), data)
+    })
   }
 
   func testInitScalarsUInt16() {
@@ -359,9 +322,9 @@ class TensorTest: XCTestCase {
     XCTAssertEqual(tensor.strides, [1])
     XCTAssertEqual(tensor.dimensionOrder, [0])
     XCTAssertEqual(tensor.count, 6)
-    tensor.bytes { pointer, count, dataType in
-      XCTAssertEqual(Array(UnsafeBufferPointer(start: pointer.assumingMemoryBound(to: UInt16.self), count: count)), data)
-    }
+    XCTAssertNoThrow(try tensor.withUnsafeBytes { buffer in
+      XCTAssertEqual(Array(buffer), data)
+    })
   }
 
   func testInitScalarsUInt32() {
@@ -372,9 +335,9 @@ class TensorTest: XCTestCase {
     XCTAssertEqual(tensor.strides, [1])
     XCTAssertEqual(tensor.dimensionOrder, [0])
     XCTAssertEqual(tensor.count, 6)
-    tensor.bytes { pointer, count, dataType in
-      XCTAssertEqual(Array(UnsafeBufferPointer(start: pointer.assumingMemoryBound(to: UInt32.self), count: count)), data)
-    }
+    XCTAssertNoThrow(try tensor.withUnsafeBytes { buffer in
+      XCTAssertEqual(Array(buffer), data)
+    })
   }
 
   func testInitScalarsUInt64() {
@@ -385,9 +348,9 @@ class TensorTest: XCTestCase {
     XCTAssertEqual(tensor.strides, [1])
     XCTAssertEqual(tensor.dimensionOrder, [0])
     XCTAssertEqual(tensor.count, 6)
-    tensor.bytes { pointer, count, dataType in
-      XCTAssertEqual(Array(UnsafeBufferPointer(start: pointer.assumingMemoryBound(to: UInt64.self), count: count)), data)
-    }
+    XCTAssertNoThrow(try tensor.withUnsafeBytes { buffer in
+      XCTAssertEqual(Array(buffer), data)
+    })
   }
 
   func testInitScalarsInt() {
@@ -398,9 +361,9 @@ class TensorTest: XCTestCase {
     XCTAssertEqual(tensor.strides, [1])
     XCTAssertEqual(tensor.dimensionOrder, [0])
     XCTAssertEqual(tensor.count, 6)
-    tensor.bytes { pointer, count, dataType in
-      XCTAssertEqual(Array(UnsafeBufferPointer(start: pointer.assumingMemoryBound(to: Int.self), count: count)), data)
-    }
+    XCTAssertNoThrow(try tensor.withUnsafeBytes { buffer in
+      XCTAssertEqual(Array(buffer), data)
+    })
   }
 
   func testInitScalarsUInt() {
@@ -411,9 +374,9 @@ class TensorTest: XCTestCase {
     XCTAssertEqual(tensor.strides, [1])
     XCTAssertEqual(tensor.dimensionOrder, [0])
     XCTAssertEqual(tensor.count, 6)
-    tensor.bytes { pointer, count, dataType in
-      XCTAssertEqual(Array(UnsafeBufferPointer(start: pointer.assumingMemoryBound(to: UInt.self), count: count)), data)
-    }
+    XCTAssertNoThrow(try tensor.withUnsafeBytes { buffer in
+      XCTAssertEqual(Array(buffer), data)
+    })
   }
 
   func testInitInt8() {
@@ -423,9 +386,9 @@ class TensorTest: XCTestCase {
     XCTAssertEqual(tensor.strides, [])
     XCTAssertEqual(tensor.dimensionOrder, [])
     XCTAssertEqual(tensor.count, 1)
-    tensor.bytes { pointer, count, dataType in
-      XCTAssertEqual(UnsafeBufferPointer(start: pointer.assumingMemoryBound(to: Int8.self), count: count).first, 42)
-    }
+    XCTAssertNoThrow(try tensor.withUnsafeBytes { (buffer: UnsafeBufferPointer<Int8>) in
+      XCTAssertEqual(Array(buffer).first, 42)
+    })
   }
 
   func testInitInt16() {
@@ -435,9 +398,9 @@ class TensorTest: XCTestCase {
     XCTAssertEqual(tensor.strides, [])
     XCTAssertEqual(tensor.dimensionOrder, [])
     XCTAssertEqual(tensor.count, 1)
-    tensor.bytes { pointer, count, dataType in
-      XCTAssertEqual(UnsafeBufferPointer(start: pointer.assumingMemoryBound(to: Int16.self), count: count).first, 42)
-    }
+    XCTAssertNoThrow(try tensor.withUnsafeBytes { (buffer: UnsafeBufferPointer<Int16>) in
+      XCTAssertEqual(Array(buffer).first, 42)
+    })
   }
 
   func testInitInt32() {
@@ -447,9 +410,9 @@ class TensorTest: XCTestCase {
     XCTAssertEqual(tensor.strides, [])
     XCTAssertEqual(tensor.dimensionOrder, [])
     XCTAssertEqual(tensor.count, 1)
-    tensor.bytes { pointer, count, dataType in
-      XCTAssertEqual(UnsafeBufferPointer(start: pointer.assumingMemoryBound(to: Int32.self), count: count).first, 42)
-    }
+    XCTAssertNoThrow(try tensor.withUnsafeBytes { (buffer: UnsafeBufferPointer<Int32>) in
+      XCTAssertEqual(Array(buffer).first, 42)
+    })
   }
 
   func testInitInt64() {
@@ -459,9 +422,9 @@ class TensorTest: XCTestCase {
     XCTAssertEqual(tensor.strides, [])
     XCTAssertEqual(tensor.dimensionOrder, [])
     XCTAssertEqual(tensor.count, 1)
-    tensor.bytes { pointer, count, dataType in
-      XCTAssertEqual(UnsafeBufferPointer(start: pointer.assumingMemoryBound(to: Int64.self), count: count).first, 42)
-    }
+    XCTAssertNoThrow(try tensor.withUnsafeBytes { (buffer: UnsafeBufferPointer<Int64>) in
+      XCTAssertEqual(Array(buffer).first, 42)
+    })
   }
 
   func testInitUInt8() {
@@ -471,9 +434,9 @@ class TensorTest: XCTestCase {
     XCTAssertEqual(tensor.strides, [])
     XCTAssertEqual(tensor.dimensionOrder, [])
     XCTAssertEqual(tensor.count, 1)
-    tensor.bytes { pointer, count, dataType in
-      XCTAssertEqual(UnsafeBufferPointer(start: pointer.assumingMemoryBound(to: UInt8.self), count: count).first, 42)
-    }
+    XCTAssertNoThrow(try tensor.withUnsafeBytes { (buffer: UnsafeBufferPointer<UInt8>) in
+      XCTAssertEqual(Array(buffer).first, 42)
+    })
   }
 
   func testInitUInt16() {
@@ -483,9 +446,9 @@ class TensorTest: XCTestCase {
     XCTAssertEqual(tensor.strides, [])
     XCTAssertEqual(tensor.dimensionOrder, [])
     XCTAssertEqual(tensor.count, 1)
-    tensor.bytes { pointer, count, dataType in
-      XCTAssertEqual(UnsafeBufferPointer(start: pointer.assumingMemoryBound(to: UInt16.self), count: count).first, 42)
-    }
+    XCTAssertNoThrow(try tensor.withUnsafeBytes { (buffer: UnsafeBufferPointer<UInt16>) in
+      XCTAssertEqual(Array(buffer).first, 42)
+    })
   }
 
   func testInitUInt32() {
@@ -495,9 +458,9 @@ class TensorTest: XCTestCase {
     XCTAssertEqual(tensor.strides, [])
     XCTAssertEqual(tensor.dimensionOrder, [])
     XCTAssertEqual(tensor.count, 1)
-    tensor.bytes { pointer, count, dataType in
-      XCTAssertEqual(UnsafeBufferPointer(start: pointer.assumingMemoryBound(to: UInt32.self), count: count).first, 42)
-    }
+    XCTAssertNoThrow(try tensor.withUnsafeBytes { (buffer: UnsafeBufferPointer<UInt32>) in
+      XCTAssertEqual(Array(buffer).first, 42)
+    })
   }
 
   func testInitUInt64() {
@@ -507,9 +470,9 @@ class TensorTest: XCTestCase {
     XCTAssertEqual(tensor.strides, [])
     XCTAssertEqual(tensor.dimensionOrder, [])
     XCTAssertEqual(tensor.count, 1)
-    tensor.bytes { pointer, count, dataType in
-      XCTAssertEqual(UnsafeBufferPointer(start: pointer.assumingMemoryBound(to: UInt64.self), count: count).first, 42)
-    }
+    XCTAssertNoThrow(try tensor.withUnsafeBytes { (buffer: UnsafeBufferPointer<UInt64>) in
+      XCTAssertEqual(Array(buffer).first, 42)
+    })
   }
 
   func testInitBool() {
@@ -519,9 +482,9 @@ class TensorTest: XCTestCase {
     XCTAssertEqual(tensor.strides, [])
     XCTAssertEqual(tensor.dimensionOrder, [])
     XCTAssertEqual(tensor.count, 1)
-    tensor.bytes { pointer, count, dataType in
-      XCTAssertEqual(UnsafeBufferPointer(start: pointer.assumingMemoryBound(to: Bool.self), count: count).first, true)
-    }
+    XCTAssertNoThrow(try tensor.withUnsafeBytes { (buffer: UnsafeBufferPointer<Bool>) in
+      XCTAssertEqual(Array(buffer).first, true)
+    })
   }
 
   func testInitFloat() {
@@ -531,9 +494,9 @@ class TensorTest: XCTestCase {
     XCTAssertEqual(tensor.strides, [])
     XCTAssertEqual(tensor.dimensionOrder, [])
     XCTAssertEqual(tensor.count, 1)
-    tensor.bytes { pointer, count, dataType in
-      XCTAssertEqual(UnsafeBufferPointer(start: pointer.assumingMemoryBound(to: Float.self), count: count).first, 42.0)
-    }
+    XCTAssertNoThrow(try tensor.withUnsafeBytes { (buffer: UnsafeBufferPointer<Float>) in
+      XCTAssertEqual(Array(buffer).first, 42)
+    })
   }
 
   func testInitDouble() {
@@ -543,9 +506,9 @@ class TensorTest: XCTestCase {
     XCTAssertEqual(tensor.strides, [])
     XCTAssertEqual(tensor.dimensionOrder, [])
     XCTAssertEqual(tensor.count, 1)
-    tensor.bytes { pointer, count, dataType in
-      XCTAssertEqual(UnsafeBufferPointer(start: pointer.assumingMemoryBound(to: Double.self), count: count).first, 42.0)
-    }
+    XCTAssertNoThrow(try tensor.withUnsafeBytes { (buffer: UnsafeBufferPointer<Double>) in
+      XCTAssertEqual(Array(buffer).first, 42.0)
+    })
   }
 
   func testInitInt() {
@@ -555,9 +518,9 @@ class TensorTest: XCTestCase {
     XCTAssertEqual(tensor.strides, [])
     XCTAssertEqual(tensor.dimensionOrder, [])
     XCTAssertEqual(tensor.count, 1)
-    tensor.bytes { pointer, count, dataType in
-      XCTAssertEqual(UnsafeBufferPointer(start: pointer.assumingMemoryBound(to: Int.self), count: count).first, 42)
-    }
+    XCTAssertNoThrow(try tensor.withUnsafeBytes { buffer in
+      XCTAssertEqual(Array(buffer).first, 42)
+    })
   }
 
   func testInitUInt() {
@@ -567,20 +530,20 @@ class TensorTest: XCTestCase {
     XCTAssertEqual(tensor.strides, [])
     XCTAssertEqual(tensor.dimensionOrder, [])
     XCTAssertEqual(tensor.count, 1)
-    tensor.bytes { pointer, count, dataType in
-      XCTAssertEqual(UnsafeBufferPointer(start: pointer.assumingMemoryBound(to: UInt.self), count: count).first, 42)
-    }
+    XCTAssertNoThrow(try tensor.withUnsafeBytes { (buffer: UnsafeBufferPointer<UInt64>) in
+      XCTAssertEqual(Array(buffer).first, 42)
+    })
   }
 
   func testEmpty() {
     let tensor = Tensor.empty(shape: [3, 4], dataType: .float)
     XCTAssertEqual(tensor.shape, [3, 4])
     XCTAssertEqual(tensor.count, 12)
-    tensor.bytes { pointer, count, dataType in
-      XCTAssertNotNil(pointer)
-      XCTAssertEqual(count, 12)
-      XCTAssertEqual(dataType, .float)
-    }
+    XCTAssertNoThrow(try tensor.withUnsafeBytes { (buffer: UnsafeBufferPointer<Float>) in
+      XCTAssertNotNil(buffer.baseAddress)
+      XCTAssertEqual(buffer.count, 12)
+      XCTAssertEqual(tensor.dataType, .float)
+    })
   }
 
   func testEmptyLike() {
@@ -596,87 +559,76 @@ class TensorTest: XCTestCase {
     let tensor = Tensor.full(shape: [2, 2], scalar: 7, dataType: .int)
     XCTAssertEqual(tensor.shape, [2, 2])
     XCTAssertEqual(tensor.count, 4)
-    tensor.bytes { pointer, count, dataType in
-      XCTAssertEqual(dataType, .int)
-      let buffer = UnsafeBufferPointer(start: pointer.assumingMemoryBound(to: Int32.self), count: count)
+    XCTAssertNoThrow(try tensor.withUnsafeBytes { (buffer: UnsafeBufferPointer<Int32>) in
       for value in buffer {
         XCTAssertEqual(value, 7)
       }
-    }
+    })
   }
 
   func testFullLike() {
     let other = Tensor.empty(shape: [2, 2], dataType: .int)
     let tensor = Tensor.full(like: other, scalar: 42, dataType: .float)
     XCTAssertEqual(tensor.shape, other.shape)
-    tensor.bytes { pointer, count, dataType in
-      let buffer = UnsafeBufferPointer(start: pointer.assumingMemoryBound(to: Float.self), count: count)
+    XCTAssertNoThrow(try tensor.withUnsafeBytes { (buffer: UnsafeBufferPointer<Float>) in
       for value in buffer {
         XCTAssertEqual(value, 42.0)
       }
-    }
+    })
   }
 
   func testOnes() {
     let tensor = Tensor.ones(shape: [2, 3], dataType: .float)
     XCTAssertEqual(tensor.shape, [2, 3])
     XCTAssertEqual(tensor.count, 6)
-    tensor.bytes { pointer, count, dataType in
-      XCTAssertEqual(dataType, .float)
-      let buffer = UnsafeBufferPointer(start: pointer.assumingMemoryBound(to: Float.self), count: count)
+    XCTAssertNoThrow(try tensor.withUnsafeBytes { (buffer: UnsafeBufferPointer<Float>) in
       for value in buffer {
         XCTAssertEqual(value, 1.0)
       }
-    }
+    })
   }
 
   func testOnesLike() {
     let other = Tensor.empty(shape: [2, 4], dataType: .double)
     let tensor = Tensor.ones(like: other)
     XCTAssertEqual(tensor.shape, other.shape)
-    tensor.bytes { pointer, count, dataType in
-      let buffer = UnsafeBufferPointer(start: pointer.assumingMemoryBound(to: Double.self), count: count)
+    XCTAssertNoThrow(try tensor.withUnsafeBytes { (buffer: UnsafeBufferPointer<Double>) in
       for value in buffer {
         XCTAssertEqual(value, 1.0)
       }
-    }
+    })
   }
 
   func testZeros() {
     let tensor = Tensor.zeros(shape: [2, 3], dataType: .double)
     XCTAssertEqual(tensor.shape, [2, 3])
     XCTAssertEqual(tensor.count, 6)
-    tensor.bytes { pointer, count, dataType in
-      XCTAssertEqual(dataType, .double)
-      let buffer = UnsafeBufferPointer(start: pointer.assumingMemoryBound(to: Double.self), count: count)
+    XCTAssertNoThrow(try tensor.withUnsafeBytes { (buffer: UnsafeBufferPointer<Double>) in
       for value in buffer {
         XCTAssertEqual(value, 0)
       }
-    }
+    })
   }
 
   func testZerosLike() {
     let other = Tensor.full(shape: [3, 2], scalar: 9, dataType: .int)
     let tensor = Tensor.zeros(like: other)
     XCTAssertEqual(tensor.shape, other.shape)
-    tensor.bytes { pointer, count, dataType in
-      let buffer = UnsafeBufferPointer(start: pointer.assumingMemoryBound(to: Int32.self), count: count)
+    XCTAssertNoThrow(try tensor.withUnsafeBytes { (buffer: UnsafeBufferPointer<Int32>) in
       for value in buffer {
         XCTAssertEqual(value, 0)
       }
-    }
+    })
   }
 
   func testRandom() {
     let tensor = Tensor.rand(shape: [3, 3], dataType: .float)
     XCTAssertEqual(tensor.shape, [3, 3])
     XCTAssertEqual(tensor.count, 9)
-    tensor.bytes { pointer, count, dataType in
-      XCTAssertEqual(dataType, .float)
-      let buffer = UnsafeBufferPointer(start: pointer.assumingMemoryBound(to: Float.self), count: count)
-      let uniqueValues = Set(buffer.map { $0 })
+    XCTAssertNoThrow(try tensor.withUnsafeBytes { (buffer: UnsafeBufferPointer<Float>) in
+      let uniqueValues = Set(buffer)
       XCTAssertTrue(uniqueValues.count > 1)
-    }
+    })
   }
 
   func testRandomLike() {
@@ -686,15 +638,13 @@ class TensorTest: XCTestCase {
     XCTAssertEqual(tensor.count, other.count)
   }
 
- func testRandomNormal() {
+  func testRandomNormal() {
     let tensor = Tensor.randn(shape: [4], dataType: .double)
     XCTAssertEqual(tensor.shape, [4])
     XCTAssertEqual(tensor.count, 4)
-    tensor.bytes { pointer, count, dataType in
-      XCTAssertEqual(dataType, .double)
-      let buffer = UnsafeBufferPointer(start: pointer.assumingMemoryBound(to: Double.self), count: count)
+    XCTAssertNoThrow(try tensor.withUnsafeBytes { (buffer: UnsafeBufferPointer<Double>) in
       XCTAssertEqual(buffer.count, 4)
-    }
+    })
   }
 
   func testRandomNormalLike() {
@@ -708,23 +658,20 @@ class TensorTest: XCTestCase {
     let tensor = Tensor.randint(low: 10, high: 20, shape: [5], dataType: .int)
     XCTAssertEqual(tensor.shape, [5])
     XCTAssertEqual(tensor.count, 5)
-    tensor.bytes { pointer, count, dataType in
-      XCTAssertEqual(dataType, .int)
-      let buffer = UnsafeBufferPointer(start: pointer.assumingMemoryBound(to: Int32.self), count: count)
+    XCTAssertNoThrow(try tensor.withUnsafeBytes { (buffer: UnsafeBufferPointer<Int32>) in
       for value in buffer {
         XCTAssertTrue(value >= 10 && value < 20)
       }
-    }
+    })
   }
 
   func testRandomIntegerLike() {
     let other = Tensor.ones(shape: [5], dataType: .int)
     let tensor = Tensor.randint(like: other, low: 100, high: 200)
-    tensor.bytes { pointer, count, dataType in
-      let buffer = UnsafeBufferPointer(start: pointer.assumingMemoryBound(to: Int32.self), count: count)
+    XCTAssertNoThrow(try tensor.withUnsafeBytes { (buffer: UnsafeBufferPointer<Int32>) in
       for value in buffer {
         XCTAssertTrue(value >= 100 && value < 200)
       }
-    }
+    })
   }
 }
diff --git a/extension/benchmark/apple/Benchmark/Benchmark.xcodeproj/project.pbxproj b/extension/benchmark/apple/Benchmark/Benchmark.xcodeproj/project.pbxproj
index 355227eef63..b0cddfa808c 100644
--- a/extension/benchmark/apple/Benchmark/Benchmark.xcodeproj/project.pbxproj
+++ b/extension/benchmark/apple/Benchmark/Benchmark.xcodeproj/project.pbxproj
@@ -392,7 +392,7 @@
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 			shellPath = /bin/sh;
-			shellScript = "set -e\n\nif ! command -v cmake &> /dev/null\nthen\n    echo \"Cmake not found, please install Cmake. \\n1. Download Cmake.app from https://cmake.org/download with version > 3.19. \\n2. Install it to Applications/ folder and run sudo /Applications/CMake.app/Contents/bin/cmake-gui --install to install CMake commandline tools.\"\n    exit 1\nfi\n\nCMAKE_DIR=\"$TEMP_DIR/cmake\"\nrm -rf \"$CMAKE_DIR\"\n\nPLATFORM=\"SIMULATORARM64\"\nDEPLOYMENT_TARGET=\"17.0\"\n\nif [[ \"$PLATFORM_NAME\" == *\"iphoneos\"* ]]; then\n  PLATFORM=\"OS64\"\nelif [[ \"$PLATFORM_NAME\" == *\"macos\"* ]]; then\n  PLATFORM=\"MAC_ARM64\"\n  DEPLOYMENT_TARGET=\"10.15\"\nfi\n\ncmake_build() {\n    local src_dir=$1\n    local target=$2\n    shift 2\n    local extra_args=(\"$@\")\n    local build_dir=\"$CMAKE_DIR/build/$(basename \"$src_dir\")\"\n\n    mkdir -p \"$build_dir\" && cd \"$build_dir\"\n\n    if [[ \"$PLATFORM\" == \"MAC_ARM64\" ]]; then\n        extra_args+=(-DCMAKE_INSTALL_BUNDLEDIR=\"${CMAKE_DIR}/bin\")\n        extra_args+=(-DCMAKE_MACOSX_BUNDLE=OFF)\n    fi\n    cmake -G Xcode \\\n          -DCMAKE_BUILD_TYPE=\"Release\" \\\n          -DCMAKE_CXX_STANDARD=17 \\\n          -DCMAKE_TOOLCHAIN_FILE=\"$SRCROOT/../../../../third-party/ios-cmake/ios.toolchain.cmake\" \\\n          -DCMAKE_XCODE_ATTRIBUTE_CLANG_CXX_LANGUAGE_STANDARD=\"c++17\" \\\n          -DCMAKE_XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY=\"libc++\" \\\n          -DPLATFORM=\"$PLATFORM\" \\\n          -DDEPLOYMENT_TARGET=\"$DEPLOYMENT_TARGET\" \\\n          -DCMAKE_INSTALL_PREFIX=\"$CMAKE_DIR\" \\\n          \"${extra_args[@]}\" \\\n          \"$src_dir\"\n    cmake --build . --config \"Release\" --target \"$target\"\n    if [[ \"$target\" == \"install\" ]]; then\n        cmake --install . --prefix \"$CMAKE_DIR\"\n    fi\n}\n\ncmake_build \"$SRCROOT/../../../llm/tokenizers/third-party/abseil-cpp\" \"install\" \\\n    -DABSL_PROPAGATE_CXX_STD=ON\n\ncmake_build \"$SRCROOT/../../../llm/tokenizers/third-party/re2\" \"install\"\n\ncmake_build \"$SRCROOT/../../../llm/tokenizers/third-party/pcre2\" \"install\" \\\n    -DPCRE2_BUILD_PCRE2_8=ON \\\n    -DPCRE2_BUILD_PCRE2_16=OFF \\\n    -DPCRE2_BUILD_PCRE2_32=OFF \\\n    -DPCRE2_BUILD_TESTS=OFF \\\n    -DPCRE2_BUILD_PCRE2GREP=OFF \\\n    -DPCRE2_BUILD_PCRE2TEST=OFF \\\n    -DPCRE2_BUILD_PCRE2GPERF=OFF \\\n    -DPCRE2_BUILD_DOCS=OFF \\\n    -DPCRE2_BUILD_LIBPCRE2_PDB=OFF\n    \ncmake_build \"$SRCROOT/../../../llm/tokenizers/third-party/sentencepiece\" \"sentencepiece-static\" \\\n    -DSPM_ENABLE_SHARED=OFF\n    \ncmake_build \"$SRCROOT/../../../llm/tokenizers/third-party/llama.cpp-unicode\" \"install\"\n    \n# Include the single header for json.\nmkdir -p \"$CMAKE_DIR/include/nlohmann\"\ncp \"$SRCROOT/../../../llm/tokenizers/third-party/json/single_include/nlohmann/json.hpp\" \"$CMAKE_DIR/include/nlohmann/json.hpp\"\n\necho \"$(find $CMAKE_DIR/lib -name \"*.a\" | sed -E 's|^.*/lib([^/]+)\\.a|-l\\1|g' | tr '\\n' ' ')\" > \"$CMAKE_DIR/linker_flags\"\n";
+			shellScript = "set -e\n\nif ! command -v cmake &> /dev/null\nthen\n    echo \"Cmake not found, please install Cmake. \\n1. Download Cmake.app from https://cmake.org/download with version > 3.19. \\n2. Install it to Applications/ folder and run sudo /Applications/CMake.app/Contents/bin/cmake-gui --install to install CMake commandline tools.\"\n    exit 1\nfi\n\nCMAKE_DIR=\"$TEMP_DIR/cmake\"\nrm -rf \"$CMAKE_DIR\"\n\nPLATFORM=\"SIMULATORARM64\"\nDEPLOYMENT_TARGET=\"17.0\"\n\nif [[ \"$PLATFORM_NAME\" == *\"iphoneos\"* ]]; then\n  PLATFORM=\"OS64\"\nelif [[ \"$PLATFORM_NAME\" == *\"macos\"* ]]; then\n  PLATFORM=\"MAC_ARM64\"\n  DEPLOYMENT_TARGET=\"12.0\"\nfi\n\ncmake_build() {\n    local src_dir=$1\n    local target=$2\n    shift 2\n    local extra_args=(\"$@\")\n    local build_dir=\"$CMAKE_DIR/build/$(basename \"$src_dir\")\"\n\n    mkdir -p \"$build_dir\" && cd \"$build_dir\"\n\n    if [[ \"$PLATFORM\" == \"MAC_ARM64\" ]]; then\n        extra_args+=(-DCMAKE_INSTALL_BUNDLEDIR=\"${CMAKE_DIR}/bin\")\n        extra_args+=(-DCMAKE_MACOSX_BUNDLE=OFF)\n    fi\n    cmake -G Xcode \\\n          -DCMAKE_BUILD_TYPE=\"Release\" \\\n          -DCMAKE_CXX_STANDARD=17 \\\n          -DCMAKE_TOOLCHAIN_FILE=\"$SRCROOT/../../../../third-party/ios-cmake/ios.toolchain.cmake\" \\\n          -DCMAKE_XCODE_ATTRIBUTE_CLANG_CXX_LANGUAGE_STANDARD=\"c++17\" \\\n          -DCMAKE_XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY=\"libc++\" \\\n          -DPLATFORM=\"$PLATFORM\" \\\n          -DDEPLOYMENT_TARGET=\"$DEPLOYMENT_TARGET\" \\\n          -DCMAKE_INSTALL_PREFIX=\"$CMAKE_DIR\" \\\n          \"${extra_args[@]}\" \\\n          \"$src_dir\"\n    cmake --build . --config \"Release\" --target \"$target\"\n    if [[ \"$target\" == \"install\" ]]; then\n        cmake --install . --prefix \"$CMAKE_DIR\"\n    fi\n}\n\ncmake_build \"$SRCROOT/../../../llm/tokenizers/third-party/abseil-cpp\" \"install\" \\\n    -DABSL_PROPAGATE_CXX_STD=ON\n\ncmake_build \"$SRCROOT/../../../llm/tokenizers/third-party/re2\" \"install\"\n\ncmake_build \"$SRCROOT/../../../llm/tokenizers/third-party/pcre2\" \"install\" \\\n    -DPCRE2_BUILD_PCRE2_8=ON \\\n    -DPCRE2_BUILD_PCRE2_16=OFF \\\n    -DPCRE2_BUILD_PCRE2_32=OFF \\\n    -DPCRE2_BUILD_TESTS=OFF \\\n    -DPCRE2_BUILD_PCRE2GREP=OFF \\\n    -DPCRE2_BUILD_PCRE2TEST=OFF \\\n    -DPCRE2_BUILD_PCRE2GPERF=OFF \\\n    -DPCRE2_BUILD_DOCS=OFF \\\n    -DPCRE2_BUILD_LIBPCRE2_PDB=OFF\n    \ncmake_build \"$SRCROOT/../../../llm/tokenizers/third-party/sentencepiece\" \"sentencepiece-static\" \\\n    -DSPM_ENABLE_SHARED=OFF\n    \ncmake_build \"$SRCROOT/../../../llm/tokenizers/third-party/llama.cpp-unicode\" \"install\"\n    \n# Include the single header for json.\nmkdir -p \"$CMAKE_DIR/include/nlohmann\"\ncp \"$SRCROOT/../../../llm/tokenizers/third-party/json/single_include/nlohmann/json.hpp\" \"$CMAKE_DIR/include/nlohmann/json.hpp\"\n\necho \"$(find $CMAKE_DIR/lib -name \"*.a\" | sed -E 's|^.*/lib([^/]+)\\.a|-l\\1|g' | tr '\\n' ' ')\" > \"$CMAKE_DIR/linker_flags\"\n";
 		};
 /* End PBXShellScriptBuildPhase section */
 
@@ -632,7 +632,7 @@
 				DEVELOPMENT_TEAM = "";
 				GENERATE_INFOPLIST_FILE = YES;
 				IPHONEOS_DEPLOYMENT_TARGET = 17.0;
-				MACOSX_DEPLOYMENT_TARGET = 10.15;
+				MACOSX_DEPLOYMENT_TARGET = 12.0;
 				MARKETING_VERSION = 1.0;
 				OTHER_CODE_SIGN_FLAGS = "--deep";
 				PRODUCT_BUNDLE_IDENTIFIER = org.pytorch.executorch.BenchmarkTests;
@@ -659,7 +659,7 @@
 				DEVELOPMENT_TEAM = "";
 				GENERATE_INFOPLIST_FILE = YES;
 				IPHONEOS_DEPLOYMENT_TARGET = 17.0;
-				MACOSX_DEPLOYMENT_TARGET = 10.15;
+				MACOSX_DEPLOYMENT_TARGET = 12.0;
 				MARKETING_VERSION = 1.0;
 				OTHER_CODE_SIGN_FLAGS = "--deep";
 				PRODUCT_BUNDLE_IDENTIFIER = org.pytorch.executorch.BenchmarkTests;
diff --git a/extension/llm/tokenizers b/extension/llm/tokenizers
index 57eb76d71d6..fc5962cd9e0 160000
--- a/extension/llm/tokenizers
+++ b/extension/llm/tokenizers
@@ -1 +1 @@
-Subproject commit 57eb76d71d6dde5396520c7d35142eb868994e06
+Subproject commit fc5962cd9e08019c5df6667eba3377e7d76441f7
diff --git a/extension/pybindings/README.md b/extension/pybindings/README.md
index 8675993264d..2cd680e7bb9 100644
--- a/extension/pybindings/README.md
+++ b/extension/pybindings/README.md
@@ -2,28 +2,18 @@
 This Python module, named `portable_lib`, provides a set of functions and classes for loading and executing bundled programs. To install it, run the fullowing command:
 
 ```bash
-CMAKE_ARGS="-DEXECUTORCH_BUILD_XNNPACK=ON" pip install . --no-build-isolation
-```
-
-Or when installing the rest of dependencies:
+./install_executorch.sh
 
-```bash
-install_executorch.sh --pybind
+# ...or use pip directly
+pip install . --no-build-isolation
 ```
 
 # Link Backends
 
-You can link the runtime against some backends to make sure a delegated or partitioned model can still run by Python module successfully:
-
-```bash
-CMAKE_ARGS="-DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_COREML=ON -DEXECUTORCH_BUILD_MPS=ON" \
-  pip install . --no-build-isolation
-```
-
-Similarly, when installing the rest of dependencies:
+Not all backends are built into the pip wheel by default. You can link these missing/experimental backends by turning on the corresponding cmake flag. For example, to include the MPS backend:
 
 ```bash
-install_executorch.sh --pybind xnnpack coreml mps
+CMAKE_ARGS="-DEXECUTORCH_BUILD_MPS=ON" ./install_executorch.sh
 ```
 
 ## Functions
diff --git a/extension/threadpool/cpuinfo_utils.cpp b/extension/threadpool/cpuinfo_utils.cpp
index 21862fbd4aa..599527a885e 100644
--- a/extension/threadpool/cpuinfo_utils.cpp
+++ b/extension/threadpool/cpuinfo_utils.cpp
@@ -16,6 +16,10 @@
 
 #include <executorch/runtime/platform/assert.h>
 
+#if defined(__APPLE__) && defined(__aarch64__)
+#include <sys/sysctl.h>
+#endif
+
 namespace executorch::extension::cpuinfo {
 
 // Ignore revisions (last digit (4 LSBs))
@@ -33,6 +37,11 @@ bool is_non_performant_core(const struct cpuinfo_uarch_info* uarch_info) {
     case cpuinfo_uarch_cortex_a53:
     case cpuinfo_uarch_cortex_a510:
     case cpuinfo_uarch_icestorm:
+    case cpuinfo_uarch_blizzard:
+    case cpuinfo_uarch_sawtooth:
+    case cpuinfo_uarch_coll_sawtooth:
+    case cpuinfo_uarch_tupai_sawtooth:
+    case cpuinfo_uarch_tahiti_sawtooth:
       return true;
     // This can be so many other cores.
     // Need to update this to better account for slow cores
@@ -167,6 +176,23 @@ uint32_t get_num_performant_cores() {
     // In one plua 12 while it has 2 little cores, the topology
     // reported in /sys/devices/system/cpu/cpu* /topology/core_siblings_list
     // report wrong topology which results in wront configratuon
+#if defined(__aarch64__) && defined(__APPLE__)
+    // Copied from ATen/ParallelCommon.cpp
+    // On Apple Silicon there are efficient and performance core
+    // Restrict parallel algorithms to performance cores by default
+    int32_t num_cores = -1;
+    size_t num_cores_len = sizeof(num_cores);
+    if (sysctlbyname(
+            "hw.perflevel0.physicalcpu",
+            &num_cores,
+            &num_cores_len,
+            nullptr,
+            0) == 0) {
+      if (num_cores > 1) {
+        return static_cast<uint32_t>(num_cores);
+      }
+    }
+#endif
     return _get_num_performant_cores();
   }
 }
diff --git a/install_executorch.py b/install_executorch.py
index 4c7b51ef239..b46c9808ba6 100644
--- a/install_executorch.py
+++ b/install_executorch.py
@@ -8,14 +8,12 @@
 
 import argparse
 import glob
-import itertools
 import logging
 import os
 import shutil
 import subprocess
 import sys
 from contextlib import contextmanager
-from typing import List, Tuple
 
 from install_requirements import (
     install_requirements,
@@ -52,10 +50,6 @@ def clean():
     print("Done cleaning build artifacts.")
 
 
-# Please keep this insync with `ShouldBuild.pybindings` in setup.py.
-VALID_PYBINDS = ["coreml", "mps", "xnnpack", "training", "openvino"]
-
-
 ################################################################################
 # Git submodules
 ################################################################################
@@ -139,14 +133,9 @@ def check_folder(folder: str, file: str) -> bool:
     logger.info("All required submodules are present.")
 
 
-def build_args_parser() -> argparse.ArgumentParser:
-    # Parse options.
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--pybind",
-        action="append",
-        nargs="+",
-        help="one or more of coreml/mps/xnnpack, or off",
+def _parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description="Install executorch in your Python environment."
     )
     parser.add_argument(
         "--clean",
@@ -166,83 +155,34 @@ def build_args_parser() -> argparse.ArgumentParser:
         "picked up without rebuilding the wheel. Extension libraries will be "
         "installed inside the source tree.",
     )
-    return parser
-
-
-# Returns (wants_off, wanted_pybindings)
-def _list_pybind_defines(args) -> Tuple[bool, List[str]]:
-    if args.pybind is None:
-        return False, []
-
-    # Flatten list of lists.
-    args.pybind = list(itertools.chain(*args.pybind))
-    if "off" in args.pybind:
-        if len(args.pybind) != 1:
-            raise Exception(f"Cannot combine `off` with other pybinds: {args.pybind}")
-        return True, []
-
-    cmake_args = []
-    for pybind_arg in args.pybind:
-        if pybind_arg not in VALID_PYBINDS:
-            raise Exception(
-                f"Unrecognized pybind argument {pybind_arg}; valid options are: {', '.join(VALID_PYBINDS)}"
-            )
-        if pybind_arg == "training":
-            cmake_args.append("-DEXECUTORCH_BUILD_EXTENSION_TRAINING=ON")
-        else:
-            cmake_args.append(f"-DEXECUTORCH_BUILD_{pybind_arg.upper()}=ON")
-
-    return False, cmake_args
+    return parser.parse_args()
 
 
 def main(args):
     if not python_is_compatible():
         sys.exit(1)
 
-    parser = build_args_parser()
-    args = parser.parse_args()
-
-    cmake_args = [os.getenv("CMAKE_ARGS", "")]
-    use_pytorch_nightly = True
-
-    wants_pybindings_off, pybind_defines = _list_pybind_defines(args)
-    if wants_pybindings_off:
-        cmake_args.append("-DEXECUTORCH_BUILD_PYBIND=OFF")
-    else:
-        cmake_args += pybind_defines
+    args = _parse_args()
 
     if args.clean:
         clean()
         return
 
-    if args.use_pt_pinned_commit:
-        # This option is used in CI to make sure that PyTorch build from the pinned commit
-        # is used instead of nightly. CI jobs wouldn't be able to catch regression from the
-        # latest PT commit otherwise
-        use_pytorch_nightly = False
-
+    cmake_args = [os.getenv("CMAKE_ARGS", "")]
     # Use ClangCL on Windows.
     # ClangCL is an alias to Clang that configures it to work in an MSVC-compatible
     # mode. Using it on Windows to avoid compiler compatibility issues for MSVC.
     if os.name == "nt":
         cmake_args.append("-T ClangCL")
-
-    #
-    # Install executorch pip package. This also makes `flatc` available on the path.
-    # The --extra-index-url may be necessary if pyproject.toml has a dependency on a
-    # pre-release or nightly version of a torch package.
-    #
-
-    # Set environment variables
     os.environ["CMAKE_ARGS"] = " ".join(cmake_args)
 
-    # Check if the required submodules are present and update them if not
     check_and_update_submodules()
-
-    install_requirements(use_pytorch_nightly)
-
-    # Run the pip install command
-    subprocess.run(
+    # This option is used in CI to make sure that PyTorch build from the pinned commit
+    # is used instead of nightly. CI jobs wouldn't be able to catch regression from the
+    # latest PT commit otherwise
+    install_requirements(use_pytorch_nightly=not args.use_pt_pinned_commit)
+    os.execvp(
+        sys.executable,
         [
             sys.executable,
             "-m",
@@ -257,14 +197,10 @@ def main(args):
             "--extra-index-url",
             TORCH_NIGHTLY_URL,
         ],
-        check=True,
     )
 
 
 if __name__ == "__main__":
     # Before doing anything, cd to the directory containing this script.
     os.chdir(os.path.dirname(os.path.abspath(__file__)))
-    if not python_is_compatible():
-        sys.exit(1)
-
     main(sys.argv[1:])
diff --git a/install_requirements.py b/install_requirements.py
index 38188d08300..f60020dbbbf 100644
--- a/install_requirements.py
+++ b/install_requirements.py
@@ -71,7 +71,7 @@ def python_is_compatible():
 #
 # NOTE: If you're changing, make the corresponding change in .ci/docker/ci_commit_pins/pytorch.txt
 # by picking the hash from the same date in https://hud.pytorch.org/hud/pytorch/pytorch/nightly/
-NIGHTLY_VERSION = "dev20250602"
+NIGHTLY_VERSION = "dev20250601"
 
 
 def install_requirements(use_pytorch_nightly):
diff --git a/kernels/aten/functions.yaml b/kernels/aten/functions.yaml
index 48a8d3bc8ee..77bf9cd573b 100644
--- a/kernels/aten/functions.yaml
+++ b/kernels/aten/functions.yaml
@@ -315,6 +315,10 @@
 
 - op: prod.out
 
+- op: rand.out
+
+- op: randn.out
+
 - op: reciprocal.out
 
 - op: relu.out
diff --git a/kernels/optimized/cpu/op_linear.cpp b/kernels/optimized/cpu/op_linear.cpp
index 210000b384d..d81bfd8643f 100644
--- a/kernels/optimized/cpu/op_linear.cpp
+++ b/kernels/optimized/cpu/op_linear.cpp
@@ -6,17 +6,69 @@
  * LICENSE file in the root directory of this source tree.
  */
 
+#include <array>
+
+#include <c10/util/irange.h>
+
 #include <executorch/kernels/optimized/blas/CPUBlas.h>
+#include <executorch/kernels/optimized/vec/functional_base.h>
+#include <executorch/kernels/optimized/vec/vec_base.h>
 #include <executorch/kernels/portable/cpu/util/matmul_ops_util.h>
 #include <executorch/runtime/kernel/kernel_includes.h>
 
-#include <array>
-
 namespace torch {
 namespace executor {
 namespace native {
 
-using Tensor = executorch::aten::Tensor;
+namespace {
+using ::executorch::aten::Tensor;
+using ::executorch::cpublas::gemm;
+using ::executorch::cpublas::TransposeType;
+using ::executorch::runtime::toString;
+using ::executorch::vec::map;
+using ::executorch::vec::Vectorized;
+
+// Use vector store to initialize with scalar bias.
+template <typename scalar_t>
+void initialize_scalar(
+    const ssize_t out_numel,
+    const scalar_t init,
+    scalar_t* out) {
+  using Vec = Vectorized<scalar_t>;
+
+  // Initialize a vector with the scalar initial value.
+  Vec init_vec(init);
+
+  ssize_t d = 0;
+  for (; d < out_numel - (out_numel % Vec::size()); d += Vec::size()) {
+    // Vector-length store.
+    init_vec.store(out + d);
+  }
+  if (out_numel - d > 0) {
+    // Sub-vector-length store.
+    init_vec.store(out + d, static_cast<int>(out_numel - d));
+  }
+}
+
+// Use std::memcpy to initialize with vector bias.
+template <typename scalar_t>
+void initialize_to_vector(
+    const ssize_t n,
+    const ssize_t m,
+    const scalar_t* bias,
+    scalar_t* out) {
+  // Output is a n x m x scalar_t, while bias is m x scalar_t.
+  const size_t row_size = static_cast<size_t>(m) * sizeof(scalar_t);
+  for (const auto col : c10::irange(n)) {
+    std::memcpy(
+        // Point to Column `col` of the output tensor.
+        out + col * m,
+        bias,
+        row_size);
+  }
+}
+
+} // namespace
 
 Tensor& opt_linear_out(
     RuntimeContext& ctx,
@@ -24,12 +76,6 @@ Tensor& opt_linear_out(
     const Tensor& mat2,
     const optional<Tensor>& bias,
     Tensor& out) {
-  ET_KERNEL_CHECK_MSG(
-      ctx,
-      !bias.has_value(),
-      InvalidArgument,
-      out,
-      "bias not supported yet in linear");
   ET_KERNEL_CHECK(ctx, check_linear_args(in, mat2, out), InvalidArgument, out);
 
   size_t output_ndim = 0;
@@ -46,28 +92,74 @@ Tensor& opt_linear_out(
     return out;
   }
 
-  int flattened_input_dim = 1;
+  ssize_t n = 1;
   for (int ii = 0; ii < in.dim() - 1; ++ii) {
-    flattened_input_dim *= in.sizes()[ii];
+    n *= in.sizes()[ii];
   }
+  const ssize_t k = in.sizes()[in.dim() - 1];
+  const ssize_t m = mat2.size(0);
+
+  if (bias.has_value()) {
+    ET_KERNEL_CHECK_MSG(
+        ctx,
+        // Bias and output dtype must match.
+        bias->dtype() == out.dtype(),
+        InvalidArgument,
+        out,
+        "Bias has wrong dtype! Expected bias dtype to be the same as out dtype %s"
+        " but got %s",
+        toString(bias->dtype()),
+        toString(out.dtype()));
+
+    ET_KERNEL_CHECK_MSG(
+        ctx,
+        // Either no bias or bias is a 1D tensor of size m or 1.
+        bias->dim() == 1 && (bias->size(0) == m || bias->size(0) == 1),
+        InvalidArgument,
+        out,
+        "Bias has wrong dimensionality! Expected 1-D tensor of size %d or empty,"
+        " but got %d-D tensor with %d elements",
+        static_cast<int>(m),
+        static_cast<int>(bias->dim()),
+        static_cast<int>(bias->numel()));
+  }
+
   ET_SWITCH_REAL_TYPES_AND2(
-      Half, BFloat16, in.scalar_type(), ctx, "mm.out", CTYPE, [&]() {
-        size_t n = flattened_input_dim;
-        size_t k = in.sizes()[in.dim() - 1];
-        size_t m = mat2.size(0);
-
-        executorch::cpublas::gemm(
-            executorch::cpublas::TransposeType::Transpose,
-            executorch::cpublas::TransposeType::NoTranspose,
+      Half, BFloat16, out.scalar_type(), ctx, "linear.out", CTYPE, [&] {
+        // Fill output with bias if it is provided.
+        if (bias.has_value() && bias->numel() == 1) {
+          // Scalar version of initialization.
+          initialize_scalar<CTYPE>(
+              out.numel(),
+              *bias->const_data_ptr<CTYPE>(),
+              out.mutable_data_ptr<CTYPE>());
+        } else if (bias.has_value()) {
+          // Assume bias is a 1D tensor of size m.
+          initialize_to_vector<CTYPE>(
+              n,
+              m,
+              bias->const_data_ptr<CTYPE>(),
+              out.mutable_data_ptr<CTYPE>());
+        }
+
+        // Set beta to 1 if bias was applied so that GEMM adds to the pre-filled
+        // bias, otherwise beta remains 0 (i.e. the output is fully overwritten
+        // by GEMM).
+        const CTYPE beta =
+            bias.has_value() ? static_cast<CTYPE>(1) : static_cast<CTYPE>(0);
+
+        gemm(
+            /*transa=*/TransposeType::Transpose,
+            /*transb=*/TransposeType::NoTranspose,
             m,
             n,
             k,
-            static_cast<CTYPE>(1),
+            /*alpha=*/static_cast<CTYPE>(1),
             mat2.const_data_ptr<CTYPE>(),
             k,
             in.const_data_ptr<CTYPE>(),
             k,
-            static_cast<CTYPE>(0),
+            beta,
             out.mutable_data_ptr<CTYPE>(),
             m);
       });
diff --git a/kernels/portable/cpu/op_rand.cpp b/kernels/portable/cpu/op_rand.cpp
new file mode 100644
index 00000000000..ba9b160019e
--- /dev/null
+++ b/kernels/portable/cpu/op_rand.cpp
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#include <c10/util/irange.h>
+
+#include <executorch/kernels/portable/cpu/scalar_utils.h>
+#include <executorch/runtime/kernel/kernel_includes.h>
+
+#include <random>
+
+namespace torch {
+namespace executor {
+namespace native {
+
+using executorch::aten::IntArrayRef;
+using Tensor = executorch::aten::Tensor;
+using ScalarType = executorch::aten::ScalarType;
+
+Tensor&
+rand_out(KernelRuntimeContext& ctx, const IntArrayRef sizes, Tensor& out) {
+  (void)ctx;
+
+  std::mt19937 gen((std::random_device())());
+  std::uniform_real_distribution<double> dist(0.0, 1.0);
+
+  // Resize for dynamic shape
+  ET_KERNEL_CHECK_MSG(
+      ctx,
+      resize_tensor(out, sizes) == Error::Ok,
+      InvalidArgument,
+      out,
+      "Failed to resize output tensor.");
+
+  ET_SWITCH_FLOATHBF16_TYPES(out.scalar_type(), ctx, "randn.out", CTYPE, [&] {
+    auto data_out = out.mutable_data_ptr<CTYPE>();
+    for (const auto i : c10::irange(out.numel())) {
+      data_out[i] = static_cast<CTYPE>(dist(gen));
+    }
+  });
+
+  return out;
+}
+
+} // namespace native
+} // namespace executor
+} // namespace torch
diff --git a/kernels/portable/cpu/op_randn.cpp b/kernels/portable/cpu/op_randn.cpp
new file mode 100644
index 00000000000..a0732e7f177
--- /dev/null
+++ b/kernels/portable/cpu/op_randn.cpp
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#include <c10/util/irange.h>
+
+#include <executorch/kernels/portable/cpu/scalar_utils.h>
+#include <executorch/runtime/kernel/kernel_includes.h>
+
+#include <random>
+
+namespace torch {
+namespace executor {
+namespace native {
+
+using executorch::aten::IntArrayRef;
+using Tensor = executorch::aten::Tensor;
+using ScalarType = executorch::aten::ScalarType;
+
+Tensor&
+randn_out(KernelRuntimeContext& ctx, const IntArrayRef sizes, Tensor& out) {
+  (void)ctx;
+
+  std::mt19937 gen((std::random_device())());
+  std::normal_distribution<double> dist(0.0, 1.0);
+
+  // Resize for dynamic shape
+  ET_KERNEL_CHECK_MSG(
+      ctx,
+      resize_tensor(out, sizes) == Error::Ok,
+      InvalidArgument,
+      out,
+      "Failed to resize output tensor.");
+
+  ET_SWITCH_FLOATHBF16_TYPES(out.scalar_type(), ctx, "randn.out", CTYPE, [&] {
+    auto data_out = out.mutable_data_ptr<CTYPE>();
+    for (const auto i : c10::irange(out.numel())) {
+      data_out[i] = static_cast<CTYPE>(dist(gen));
+    }
+  });
+
+  return out;
+}
+
+} // namespace native
+} // namespace executor
+} // namespace torch
diff --git a/kernels/portable/functions.yaml b/kernels/portable/functions.yaml
index ecd6a771646..feaee415f91 100644
--- a/kernels/portable/functions.yaml
+++ b/kernels/portable/functions.yaml
@@ -713,6 +713,18 @@
     - arg_meta: null
       kernel_name: torch::executor::prod_out
 
+- op: rand.out
+  kernels:
+    - arg_meta: null
+      kernel_name: torch::executor::rand_out
+  tags: nondeterministic_seeded
+
+- op: randn.out
+  kernels:
+    - arg_meta: null
+      kernel_name: torch::executor::randn_out
+  tags: nondeterministic_seeded
+
 - op: reciprocal.out
   kernels:
     - arg_meta: null
diff --git a/kernels/test/CMakeLists.txt b/kernels/test/CMakeLists.txt
index 6cd34773d14..4f174b5a652 100644
--- a/kernels/test/CMakeLists.txt
+++ b/kernels/test/CMakeLists.txt
@@ -197,6 +197,8 @@ set(all_test_sources
     "op_permute_copy_test.cpp"
     "op_pixel_shuffle_test.cpp"
     "op_prod_test.cpp"
+    "op_rand_test.cpp"
+    "op_randn_test.cpp"
     "op_reciprocal_test.cpp"
     "op_relu_test.cpp"
     "op_remainder_test.cpp"
diff --git a/kernels/test/op_linear_test.cpp b/kernels/test/op_linear_test.cpp
index d894c5a818a..0ad5790a550 100644
--- a/kernels/test/op_linear_test.cpp
+++ b/kernels/test/op_linear_test.cpp
@@ -18,7 +18,8 @@
 #include <gtest/gtest.h>
 #include <limits>
 
-using namespace ::testing;
+namespace {
+
 using executorch::aten::ArrayRef;
 using executorch::aten::Scalar;
 using executorch::aten::ScalarType;
@@ -31,7 +32,15 @@ class OpLinearOutTest : public OperatorTest {
     return torch::executor::aten::linear_outf(context_, self, mat2, {}, out);
   }
 
-  template <class CTYPE, executorch::aten::ScalarType DTYPE>
+  Tensor& op_linear_out(
+      const Tensor& self,
+      const Tensor& mat2,
+      const Tensor& bias,
+      Tensor& out) {
+    return torch::executor::aten::linear_outf(context_, self, mat2, bias, out);
+  }
+
+  template <class CTYPE, ScalarType DTYPE>
   void test_dtype() {
     TensorFactory<DTYPE> tf;
 
@@ -43,16 +52,16 @@ class OpLinearOutTest : public OperatorTest {
       }
     }
 
-    // matmul gives 32 * 2 * 3 = 192
-    Tensor x = tf.full({3, 32}, 2);
-    Tensor y = tf.full({5, 32}, 3);
+    // matmul gives 19 * 2 * 3 = 114
+    Tensor x = tf.full({3, 19}, 2);
+    Tensor y = tf.full({5, 19}, 3);
 
     // Output shape should be (3, 5)
     Tensor out = tf.zeros({3, 5});
 
     op_linear_out(x, y, out);
 
-    Tensor expected = tf.full({3, 5}, 192);
+    Tensor expected = tf.full({3, 5}, 114);
 
     EXPECT_TENSOR_EQ(out, expected);
   }
@@ -88,6 +97,80 @@ TEST_F(OpLinearOutTest, AllDtypesSupported) {
   // for those types.
 }
 
+TEST_F(OpLinearOutTest, BiasTest) {
+  TensorFactory<ScalarType::Int> tf;
+
+  // Initialize input tensors.
+  constexpr int kReduceDim = 4;
+  constexpr int kDimX = 3, kDimY = 2;
+  constexpr int kValueX = 1;
+  constexpr int kValueY = 2;
+  constexpr int kValueBias0 = 4, kValueBias1 = 7;
+  const Tensor x = tf.full({kDimX, kReduceDim}, kValueX);
+  const Tensor y = tf.full({kDimY, kReduceDim}, kValueY);
+  const Tensor b = tf.make({kDimY}, {kValueBias0, kValueBias1});
+  // Output matrix is also empty
+  Tensor out = tf.zeros({kDimX, kDimY});
+  // Initialize expected tensor.
+  constexpr int kValueExpected0 = kValueX * kValueY * kReduceDim + kValueBias0;
+  constexpr int kValueExpected1 = kValueX * kValueY * kReduceDim + kValueBias1;
+  // Check that the bias is added to the correct position in the output matrix.
+  const Tensor expected = tf.make(
+      {kDimX, kDimY},
+      {kValueExpected0,
+       kValueExpected1,
+       kValueExpected0,
+       kValueExpected1,
+       kValueExpected0,
+       kValueExpected1});
+
+  EXPECT_TENSOR_EQ(op_linear_out(x, y, b, out), expected);
+}
+
+TEST_F(OpLinearOutTest, BiasBroadcastTest) {
+  TensorFactory<ScalarType::Int> tf;
+
+  // Initialize input tensors.
+  constexpr int kReduceDim = 4;
+  constexpr int kDimX = 3, kDimY = 5;
+  constexpr int kValueX = 1;
+  constexpr int kValueY = 2;
+  constexpr int kValueBias = 4;
+  const Tensor x = tf.full({kDimX, kReduceDim}, kValueX);
+  const Tensor y = tf.full({kDimY, kReduceDim}, kValueY);
+  const Tensor b = tf.full({1}, kValueBias);
+  // Output matrix is also empty
+  Tensor out = tf.zeros({kDimX, kDimY});
+  // Initialize expected tensor.
+  constexpr int kValueExpected = kValueX * kValueY * kReduceDim + kValueBias;
+  const Tensor expected = tf.full({kDimX, kDimY}, kValueExpected);
+
+  EXPECT_TENSOR_EQ(op_linear_out(x, y, b, out), expected);
+}
+
+TEST_F(OpLinearOutTest, BiasDtypeMismatch) {
+  TensorFactory<ScalarType::Int> tf;
+  TensorFactory<ScalarType::Short> tf_bias;
+
+  // Initialize input tensors.
+  constexpr int kReduceDim = 4;
+  constexpr int kDimX = 3, kDimY = 5;
+  constexpr int kValueX = 1;
+  constexpr int kValueY = 2;
+  constexpr int kValueBias = 4;
+  Tensor x = tf.full({kDimX, kReduceDim}, kValueX);
+  Tensor y = tf.full({kDimY, kReduceDim}, kValueY);
+  // Same size as output.
+  Tensor b = tf_bias.full({kDimY}, kValueBias);
+  // Output matrix is also empty
+  Tensor out = tf.zeros({kDimX, kDimY});
+  // Initialize expected tensor.
+  constexpr int kValueExpected = kValueX * kValueY * kReduceDim + kValueBias;
+  Tensor expected = tf.full({kDimX, kDimY}, kValueExpected);
+
+  ET_EXPECT_KERNEL_FAILURE(context_, op_linear_out(x, y, b, out));
+}
+
 TEST_F(OpLinearOutTest, EmptyInputWithEmptyOutTensorPasses) {
   TensorFactory<ScalarType::Float> tf;
 
@@ -297,5 +380,4 @@ TEST_F(OpLinearOutTest, DynamicShapeUnbound) {
   Tensor ret = op_linear_out(x, y, out);
   EXPECT_TENSOR_CLOSE(out, expected_result);
 }
-
-// TODO: support and test bias
+} // namespace
diff --git a/kernels/test/op_rand_test.cpp b/kernels/test/op_rand_test.cpp
new file mode 100644
index 00000000000..7450ed6a242
--- /dev/null
+++ b/kernels/test/op_rand_test.cpp
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <c10/util/irange.h>
+#include <executorch/kernels/test/FunctionHeaderWrapper.h> // Declares the operator
+#include <executorch/kernels/test/TestUtil.h>
+#include <executorch/runtime/core/exec_aten/exec_aten.h>
+#include <executorch/runtime/core/exec_aten/testing_util/tensor_factory.h>
+#include <executorch/runtime/core/exec_aten/testing_util/tensor_util.h>
+
+#include <gtest/gtest.h>
+
+#include <cmath>
+#include <numeric>
+
+using executorch::aten::IntArrayRef;
+using executorch::aten::ScalarType;
+using executorch::aten::Tensor;
+using torch::executor::testing::TensorFactory;
+
+class OpRandTest : public OperatorTest {
+ protected:
+  void op_rand_out(const IntArrayRef sizes, Tensor& out) {
+    torch::executor::aten::rand_outf(context_, sizes, out);
+  }
+
+  template <typename CTYPE, ScalarType DTYPE>
+  void test_rand(std::vector<int64_t>& sizes) {
+    TensorFactory<DTYPE> tf;
+
+    // Tensor factory wants int32 scales, op kernel wants int64.
+    std::vector<int32_t> sizes_i32;
+    std::transform(
+        sizes.begin(),
+        sizes.end(),
+        std::back_inserter(sizes_i32),
+        [](int64_t s) { return static_cast<int32_t>(s); });
+    Tensor out = tf.zeros(sizes_i32);
+
+    IntArrayRef sizes_ref(sizes.data(), sizes.size());
+    op_rand_out(sizes_ref, out);
+
+    // Check mean and standard deviation. To avoid flaky CI, test pretty
+    // loosely.
+    auto out_data = out.const_data_ptr<CTYPE>();
+    double mean =
+        std::accumulate(
+            out_data,
+            out_data + out.numel(),
+            0.0,
+            [](double acc, CTYPE n) { return acc + static_cast<double>(n); }) /
+        out.numel();
+    double var = std::accumulate(
+                     out_data,
+                     out_data + out.numel(),
+                     0.0,
+                     [=](double acc, CTYPE n) {
+                       return acc + std::pow(static_cast<double>(n) - mean, 2);
+                     }) /
+        out.numel();
+    auto stdev = std::sqrt(var);
+
+    // These are very rough thresholds. A better test implementation would
+    // probably do a proper statistical test to compare the generated empirical
+    // data to the reference distribution, but this should do.
+
+    // Expected mean is 0.5
+    EXPECT_NEAR(mean, 0.5, 5.0 / std::sqrt(out.numel()));
+    // Expected stdev is 1/sqrt(12) ~= 0.289
+    EXPECT_NEAR(stdev, 1.0 / std::sqrt(12), 0.1);
+    EXPECT_GT(stdev, 0);
+  }
+};
+
+TEST_F(OpRandTest, SmokeTest) {
+  std::vector<int64_t> sizes = {2, 3, 4, 128};
+
+#define TEST_ENTRY(ctype, dtype) test_rand<ctype, ScalarType::dtype>(sizes);
+  ET_FORALL_FLOATHBF16_TYPES(TEST_ENTRY);
+#undef TEST_ENTRY
+}
+
+TEST_F(OpRandTest, Rank) {
+  std::vector<int64_t> sizes = {1024};
+
+  for (int64_t i = 0; i < 4; i++) {
+    sizes.push_back(i + 1);
+    test_rand<float, executorch::aten::ScalarType::Float>(sizes);
+  }
+}
diff --git a/kernels/test/op_randn_test.cpp b/kernels/test/op_randn_test.cpp
new file mode 100644
index 00000000000..41456584e91
--- /dev/null
+++ b/kernels/test/op_randn_test.cpp
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <c10/util/irange.h>
+#include <executorch/kernels/test/FunctionHeaderWrapper.h> // Declares the operator
+#include <executorch/kernels/test/TestUtil.h>
+#include <executorch/kernels/test/supported_features.h>
+#include <executorch/runtime/core/exec_aten/exec_aten.h>
+#include <executorch/runtime/core/exec_aten/testing_util/tensor_factory.h>
+#include <executorch/runtime/core/exec_aten/testing_util/tensor_util.h>
+
+#include <gtest/gtest.h>
+
+#include <cmath>
+#include <numeric>
+
+using executorch::aten::IntArrayRef;
+using executorch::aten::ScalarType;
+using executorch::aten::Tensor;
+using torch::executor::testing::TensorFactory;
+
+class OpRandnTest : public OperatorTest {
+ protected:
+  void op_randn_out(const IntArrayRef sizes, Tensor& out) {
+    torch::executor::aten::randn_outf(context_, sizes, out);
+  }
+
+  template <typename CTYPE, ScalarType DTYPE>
+  void test_randn(std::vector<int64_t>& sizes) {
+    TensorFactory<DTYPE> tf;
+
+    // Tensor factory wants int32 scales, op kernel wants int64.
+    std::vector<int32_t> sizes_i32;
+    std::transform(
+        sizes.begin(),
+        sizes.end(),
+        std::back_inserter(sizes_i32),
+        [](int64_t s) { return static_cast<int32_t>(s); });
+    Tensor out = tf.zeros(sizes_i32);
+
+    IntArrayRef sizes_ref(sizes.data(), sizes.size());
+    op_randn_out(sizes_ref, out);
+
+    // Check mean and standard deviation. To avoid flaky CI, test pretty
+    // loosely.
+    auto out_data = out.const_data_ptr<CTYPE>();
+    double mean =
+        std::accumulate(
+            out_data,
+            out_data + out.numel(),
+            0.0,
+            [](double acc, CTYPE n) { return acc + static_cast<double>(n); }) /
+        out.numel();
+    double var = std::accumulate(
+                     out_data,
+                     out_data + out.numel(),
+                     0.0,
+                     [=](double acc, CTYPE n) {
+                       return acc + std::pow(static_cast<double>(n) - mean, 2);
+                     }) /
+        out.numel();
+    auto stdev = std::sqrt(var);
+
+    // These are very rough thresholds. A better test implementation would
+    // probably do a proper statistical test to compare the generated empirical
+    // data to the reference distribution, but this should do.
+    EXPECT_LE(std::abs(mean), 5.0 / std::sqrt(out.numel()));
+    EXPECT_LE(std::abs(stdev - 1.0), 0.1);
+    EXPECT_GT(stdev, 0);
+  }
+};
+
+TEST_F(OpRandnTest, SmokeTest) {
+  std::vector<int64_t> sizes = {2, 3, 4, 128};
+
+#define TEST_ENTRY(ctype, dtype) test_randn<ctype, ScalarType::dtype>(sizes);
+  ET_FORALL_FLOATHBF16_TYPES(TEST_ENTRY);
+#undef TEST_ENTRY
+}
+
+TEST_F(OpRandnTest, Rank) {
+  std::vector<int64_t> sizes = {1024};
+
+  for (int64_t i = 0; i < 4; i++) {
+    sizes.push_back(i + 1);
+    test_randn<float, executorch::aten::ScalarType::Float>(sizes);
+  }
+}
diff --git a/kernels/test/targets.bzl b/kernels/test/targets.bzl
index c1824674fd4..bde3b8632b0 100644
--- a/kernels/test/targets.bzl
+++ b/kernels/test/targets.bzl
@@ -285,6 +285,8 @@ def define_common_targets():
     _common_op_test("op_pixel_unshuffle_test", ["aten", "portable"])
     _common_op_test("op_pow_test", ["aten", "portable"])
     _common_op_test("op_prod_test", ["aten", "portable"])
+    _common_op_test("op_rand_test", ["aten", "portable"])
+    _common_op_test("op_randn_test", ["aten", "portable"])
     _common_op_test("op_reciprocal_test", ["aten", "portable"])
     _common_op_test("op_relu_test", ["aten", "portable"])
     _common_op_test("op_remainder_test", ["aten", "portable"])
diff --git a/pytest.ini b/pytest.ini
index 4dd7f4353d2..557a307bdf2 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -47,6 +47,8 @@ addopts =
     --ignore=backends/xnnpack/test/ops/test_sdpa.py
     backends/xnnpack/test/passes
     backends/xnnpack/test/serialization
+    # backends/apple/coreml
+    backends/apple/coreml/test
     # extension/
     extension/llm/modules/test
     extension/llm/export
diff --git a/runtime/backend/backend_init_context.h b/runtime/backend/backend_init_context.h
index 71c5182f401..5a4b70e0dbc 100644
--- a/runtime/backend/backend_init_context.h
+++ b/runtime/backend/backend_init_context.h
@@ -25,8 +25,14 @@ class BackendInitContext final {
       const char* method_name = nullptr,
       const NamedDataMap* named_data_map = nullptr)
       : runtime_allocator_(runtime_allocator),
+#ifdef ET_EVENT_TRACER_ENABLED
+        event_tracer_(event_tracer),
+#else
+        event_tracer_(nullptr),
+#endif
         method_name_(method_name),
-        named_data_map_(named_data_map) {}
+        named_data_map_(named_data_map) {
+  }
 
   /** Get the runtime allocator passed from Method. It's the same runtime
    * executor used by the standard executor runtime and the life span is the
diff --git a/runtime/core/portable_type/c10/c10/targets.bzl b/runtime/core/portable_type/c10/c10/targets.bzl
index 4088110246d..827a63d2cef 100644
--- a/runtime/core/portable_type/c10/c10/targets.bzl
+++ b/runtime/core/portable_type/c10/c10/targets.bzl
@@ -65,7 +65,6 @@ def define_common_targets():
         fbcode_exported_deps = ([
             "//caffe2:aten-headers-cpu",
             "//caffe2:generated-config-header",
-            "//caffe2:torch_standalone_headers",
             "//caffe2/c10:c10_headers",
         ] + select({
             "DEFAULT": [],
@@ -84,7 +83,6 @@ def define_common_targets():
         ] + get_sleef_preprocessor_flags(),
         xplat_exported_deps = [
             "//xplat/caffe2:aten_header",
-            "//xplat/caffe2:torch_standalone_headers",
             "//xplat/caffe2/c10:c10_headers",
         ] + ["//xplat/caffe2:ovrsource_aten_Config.h" if is_arvr_mode() else "//xplat/caffe2:generated_aten_config_header",],
         exported_preprocessor_flags = select({
diff --git a/scripts/build_apple_frameworks.sh b/scripts/build_apple_frameworks.sh
index a43deed9ab7..fd457d9f21c 100755
--- a/scripts/build_apple_frameworks.sh
+++ b/scripts/build_apple_frameworks.sh
@@ -65,11 +65,6 @@ liboptimized_native_cpu_ops_lib.a,\
 libportable_kernels.a,\
 :"
 
-FRAMEWORK_KERNELS_PORTABLE="kernels_portable:\
-libportable_kernels.a,\
-libportable_ops_lib.a,\
-:"
-
 FRAMEWORK_KERNELS_QUANTIZED="kernels_quantized:\
 libquantized_kernels.a,\
 libquantized_ops_lib.a,\
@@ -86,7 +81,6 @@ usage() {
   echo "  --custom             Only build the Custom kernels."
   echo "  --mps                Only build the Metal Performance Shaders backend."
   echo "  --optimized          Only build the Optimized kernels."
-  echo "  --portable           Only build the Portable kernels."
   echo "  --quantized          Only build the Quantized kernels."
   echo "  --xnnpack            Only build the XNNPACK backend."
   echo
@@ -104,7 +98,6 @@ set_cmake_options_override() {
       "-DEXECUTORCH_BUILD_KERNELS_CUSTOM=OFF"
       "-DEXECUTORCH_BUILD_MPS=OFF"
       "-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=OFF"
-      "-DEXECUTORCH_BUILD_PORTABLE_OPS=OFF"
       "-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=OFF"
       "-DEXECUTORCH_BUILD_XNNPACK=OFF"
     )
@@ -135,7 +128,6 @@ for arg in "$@"; do
       --custom) set_cmake_options_override "EXECUTORCH_BUILD_KERNELS_CUSTOM" ;;
       --mps) set_cmake_options_override "EXECUTORCH_BUILD_MPS" ;;
       --optimized) set_cmake_options_override "EXECUTORCH_BUILD_KERNELS_OPTIMIZED" ;;
-      --portable) set_cmake_options_override "EXECUTORCH_BUILD_PORTABLE_OPS" ;;
       --quantized) set_cmake_options_override "EXECUTORCH_BUILD_KERNELS_QUANTIZED" ;;
       --xnnpack) set_cmake_options_override "EXECUTORCH_BUILD_XNNPACK" ;;
       *)
@@ -240,7 +232,6 @@ for mode in "${MODES[@]}"; do
   append_framework_flag "EXECUTORCH_BUILD_XNNPACK" "$FRAMEWORK_BACKEND_XNNPACK" "$mode"
   append_framework_flag "EXECUTORCH_BUILD_KERNELS_CUSTOM" "$FRAMEWORK_KERNELS_CUSTOM" "$mode"
   append_framework_flag "EXECUTORCH_BUILD_KERNELS_OPTIMIZED" "$FRAMEWORK_KERNELS_OPTIMIZED" "$mode"
-  append_framework_flag "EXECUTORCH_BUILD_PORTABLE_OPS" "$FRAMEWORK_KERNELS_PORTABLE" "$mode"
   append_framework_flag "EXECUTORCH_BUILD_KERNELS_QUANTIZED" "$FRAMEWORK_KERNELS_QUANTIZED" "$mode"
 
   cd "${OUTPUT_DIR}"
diff --git a/scripts/test_ios.sh b/scripts/test_ios.sh
index 245f7b06f7a..b2b3ce94e35 100755
--- a/scripts/test_ios.sh
+++ b/scripts/test_ios.sh
@@ -60,10 +60,6 @@ say "Installing CoreML Backend Requirements"
 
 ./backends/apple/coreml/scripts/install_requirements.sh
 
-say "Installing MPS Backend Requirements"
-
-./backends/apple/mps/install_requirements.sh
-
 say "Exporting Models"
 
 python3 -m examples.portable.scripts.export --model_name="$MODEL_NAME"
diff --git a/shim_et/xplat/executorch/kernels/portable/op_registration_util.bzl b/shim_et/xplat/executorch/kernels/portable/op_registration_util.bzl
index 4e379942c52..a731ce5c674 100644
--- a/shim_et/xplat/executorch/kernels/portable/op_registration_util.bzl
+++ b/shim_et/xplat/executorch/kernels/portable/op_registration_util.bzl
@@ -973,6 +973,22 @@ ATEN_OPS = (
             "//executorch/kernels/portable/cpu/util:reduce_util",
         ],
     ),
+    op_target(
+        name = "op_rand",
+        deps = [
+            ":scalar_utils",
+            "//executorch/runtime/core/exec_aten/util:scalar_type_util",
+            "//executorch/runtime/core/exec_aten/util:tensor_util",
+        ]
+    ),
+    op_target(
+        name = "op_randn",
+        deps = [
+            ":scalar_utils",
+            "//executorch/runtime/core/exec_aten/util:scalar_type_util",
+            "//executorch/runtime/core/exec_aten/util:tensor_util",
+        ]
+    ),
     op_target(
         name = "op_reciprocal",
         deps = [
diff --git a/third-party/TARGETS b/third-party/TARGETS
index c80bd9448b3..5dd76288a9b 100644
--- a/third-party/TARGETS
+++ b/third-party/TARGETS
@@ -81,18 +81,6 @@ runtime.python_binary(
     _is_external_target = True,
 )
 
-runtime.python_binary(
-    name = "gen_executorch",
-    main_module = "torchgen.gen_executorch",
-    visibility = [
-        "PUBLIC",
-    ],
-    deps = [
-        ":torchgen",
-    ],
-    _is_external_target = True,
-)
-
 runtime.filegroup(
     name = "aten_src_path",
     srcs = [
diff --git a/tools/cmake/Codegen.cmake b/tools/cmake/Codegen.cmake
index ab616a5188d..f1dac84de43 100644
--- a/tools/cmake/Codegen.cmake
+++ b/tools/cmake/Codegen.cmake
@@ -91,8 +91,9 @@ function(generate_bindings_for_kernels)
     OUTPUT_STRIP_TRAILING_WHITESPACE
   )
   file(GLOB_RECURSE _torchgen_srcs "${torchgen-out}/*.py")
+  # Not using module executorch.codegen.gen because it's not installed yet.
   set(_gen_command
-      "${PYTHON_EXECUTABLE}" -m torchgen.gen_executorch
+      "${PYTHON_EXECUTABLE}" -m codegen.gen
       --source-path=${EXECUTORCH_ROOT}/codegen --install-dir=${_out_dir}
       --tags-path=${torchgen-out}/packaged/ATen/native/tags.yaml
       --aten-yaml-path=${torchgen-out}/packaged/ATen/native/native_functions.yaml