diff --git a/codegen/tools/targets.bzl b/codegen/tools/targets.bzl
index 39de8fcb482..6ea43951385 100644
--- a/codegen/tools/targets.bzl
+++ b/codegen/tools/targets.bzl
@@ -28,6 +28,7 @@ def define_common_targets(is_fbcode = False):
         main_module = "executorch.codegen.tools.gen_oplist",
         deps = [
             ":gen_oplist_lib",
+            "//executorch/exir:lib",
         ],
         preload_deps = [] if runtime.is_oss else ["//executorch/codegen/tools:selective_build"],  # TODO(larryliu0820) :selective_build doesn't build in OSS yet
         package_style = "inplace",
diff --git a/examples/selective_build/CMakeLists.txt b/examples/selective_build/CMakeLists.txt
index a37b4362d78..487ecdec982 100644
--- a/examples/selective_build/CMakeLists.txt
+++ b/examples/selective_build/CMakeLists.txt
@@ -61,6 +61,14 @@ option(EXECUTORCH_SELECT_OPS_LIST "Register a list of ops, separated by comma"
 option(EXECUTORCH_SELECT_ALL_OPS
        "Whether to register all ops defined in portable kernel library." OFF
 )
+
+# Option to enable dtype selective build
+option(EXECUTORCH_SELECT_OPS_FROM_MODEL "Enable op selection from pte during build." OFF
+)
+
+# Option to enable dtype selective build
+option(EXECUTORCH_DTYPE_SELECTIVE_BUILD "Enable dtype selection during build." OFF
+)
 # ------------------------------- OPTIONS END --------------------------------
 
 #
@@ -108,16 +116,32 @@ gen_selected_ops(
   "${EXECUTORCH_SELECT_OPS_LIST}"
   INCLUDE_ALL_OPS
   "${EXECUTORCH_SELECT_ALL_OPS}"
+  OPS_FROM_MODEL
+  "${EXECUTORCH_SELECT_OPS_FROM_MODEL}"
+  DTYPE_SELECT
+  "${EXECUTORCH_DTYPE_SELECTIVE_BUILD}"
 )
 
 generate_bindings_for_kernels(
-  LIB_NAME "select_build_lib" FUNCTIONS_YAML
-  ${EXECUTORCH_ROOT}/kernels/portable/functions.yaml CUSTOM_OPS_YAML
+  LIB_NAME
+  "select_build_lib"
+  FUNCTIONS_YAML
+  ${EXECUTORCH_ROOT}/kernels/portable/functions.yaml
+  CUSTOM_OPS_YAML
   "${_custom_ops_yaml}"
+  DTYPE_SELECT
+  "${EXECUTORCH_DTYPE_SELECTIVE_BUILD}"
 )
 
 gen_operators_lib(
-  LIB_NAME "select_build_lib" KERNEL_LIBS ${_kernel_lib} DEPS executorch_core
+  LIB_NAME
+  "select_build_lib"
+  KERNEL_LIBS
+  ${_kernel_lib}
+  DEPS
+  executorch_core
+  DTYPE_SELECT
+  "${EXECUTORCH_DTYPE_SELECTIVE_BUILD}"
 )
 
 list(TRANSFORM _executor_runner__srcs PREPEND "${EXECUTORCH_ROOT}/")
diff --git a/examples/selective_build/test_selective_build.sh b/examples/selective_build/test_selective_build.sh
index 324a4fe27a5..f5a598011a3 100644
--- a/examples/selective_build/test_selective_build.sh
+++ b/examples/selective_build/test_selective_build.sh
@@ -94,6 +94,7 @@ test_cmake_select_all_ops() {
     rm -rf ${build_dir}
     retry cmake -DCMAKE_BUILD_TYPE=Release \
             -DEXECUTORCH_SELECT_ALL_OPS=ON \
+            -DEXECUTORCH_DTYPE_SELECTIVE_BUILD=ON \
             -DCMAKE_INSTALL_PREFIX=cmake-out \
             -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
             -B${build_dir} \
@@ -118,11 +119,9 @@ test_cmake_select_ops_in_list() {
     # set MAX_KERNEL_NUM=22: 19 primops, add, mul
     rm -rf ${build_dir}
     retry cmake -DCMAKE_BUILD_TYPE=Release \
-            -DMAX_KERNEL_NUM=22 \
-            -DEXECUTORCH_SELECT_OPS_LIST="aten::convolution.out,\
-aten::_native_batch_norm_legit_no_training.out,aten::hardtanh.out,aten::add.out,\
-aten::mean.out,aten::view_copy.out,aten::permute_copy.out,aten::addmm.out,\
-aten,aten::clone.out" \
+            -DEXECUTORCH_SELECT_OPS_FROM_MODEL="./mv2.pte" \
+            -DEXECUTORCH_OPTIMIZE_SIZE=ON \
+            -DEXECUTORCH_DTYPE_SELECTIVE_BUILD=ON \
             -DCMAKE_INSTALL_PREFIX=cmake-out \
             -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
             -B${build_dir} \
@@ -146,6 +145,7 @@ test_cmake_select_ops_in_yaml() {
     rm -rf ${build_dir}
     retry cmake -DCMAKE_BUILD_TYPE=Release \
             -DEXECUTORCH_SELECT_OPS_YAML=ON \
+            -DEXECUTORCH_DTYPE_SELECTIVE_BUILD=ON \
             -DCMAKE_INSTALL_PREFIX=cmake-out \
             -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
             -B${build_dir} \
@@ -161,6 +161,30 @@ test_cmake_select_ops_in_yaml() {
     rm "./custom_ops_1.pte"
 }
 
+test_cmake_select_ops_and_dtype() {
+    echo "Exporting MobilenetV2"
+    ${PYTHON_EXECUTABLE} -m examples.portable.scripts.export --model_name="mv2"
+    local example_dir=examples/selective_build
+    local build_dir=cmake-out/${example_dir}
+    retry cmake -DCMAKE_BUILD_TYPE=Release \
+            -DEXECUTORCH_SELECT_OPS_FROM_MODEL="./mv2.pte" \
+            -DEXECUTORCH_OPTIMIZE_SIZE=ON \
+            -DEXECUTORCH_DTYPE_SELECTIVE_BUILD=ON \
+            -DCMAKE_INSTALL_PREFIX=cmake-out \
+            -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
+            -B${build_dir} \
+            ${example_dir}
+
+    echo "Building ${example_dir}"
+    cmake --build ${build_dir} -j9 --config Release
+
+    echo 'Running selective build test'
+    ${build_dir}/selective_build_test --model_path="./mv2.pte"
+
+    echo "Removing mv2.pte"
+    rm "./mv2.pte"
+}
+
 if [[ -z $BUCK ]];
 then
   BUCK=buck2
@@ -174,9 +198,10 @@ fi
 if [[ $1 == "cmake" ]];
 then
     cmake_install_executorch_lib
-    test_cmake_select_all_ops
+#    test_cmake_select_all_ops
     test_cmake_select_ops_in_list
-    test_cmake_select_ops_in_yaml
+#    test_cmake_select_ops_in_yaml
+#    test_cmake_select_ops_and_dtype
 elif [[ $1 == "buck2" ]];
 then
     test_buck2_select_all_ops
diff --git a/mv2_schema.yaml b/mv2_schema.yaml
new file mode 100644
index 00000000000..aa74b94c305
--- /dev/null
+++ b/mv2_schema.yaml
@@ -0,0 +1,71 @@
+build_features: []
+custom_classes: []
+et_kernel_metadata:
+  aten::_native_batch_norm_legit_no_training.out:
+  - v1/6;0,1,2,3|6;0|6;0|6;0|6;0|6;0,1,2,3|6;0|6;0|6;0,1,2,3
+  aten::add.out:
+  - v1/6;0,1,2,3|6;0,1,2,3|6;0,1,2,3|6;0,1,2,3
+  aten::addmm.out:
+  - v1/6;0|6;0,1|6;0,1|6;0,1|6;0,1
+  aten::clone.out:
+  - v1/6;0,1|6;0,1|6;0,1
+  aten::convolution.out:
+  - v1/6;0,1,2,3|6;0,1,2,3|6;0,1,2,3|6;0,1,2,3
+  aten::hardtanh.out:
+  - v1/6;0,1,2,3|6;0,1,2,3|6;0,1,2,3
+  aten::mean.out:
+  - v1/6;0,1,2,3|6;0,1,2,3|6;0,1,2,3
+  aten::permute_copy.out:
+  - v1/6;0,1|6;0,1|6;0,1
+include_all_non_op_selectives: false
+include_all_operators: false
+kernel_metadata: {}
+operators:
+  aten::_native_batch_norm_legit_no_training.out:
+    debug_info:
+    - mv2.pte
+    include_all_overloads: false
+    is_root_operator: true
+    is_used_for_training: true
+  aten::add.out:
+    debug_info:
+    - mv2.pte
+    include_all_overloads: false
+    is_root_operator: true
+    is_used_for_training: true
+  aten::addmm.out:
+    debug_info:
+    - mv2.pte
+    include_all_overloads: false
+    is_root_operator: true
+    is_used_for_training: true
+  aten::clone.out:
+    debug_info:
+    - mv2.pte
+    include_all_overloads: false
+    is_root_operator: true
+    is_used_for_training: true
+  aten::convolution.out:
+    debug_info:
+    - mv2.pte
+    include_all_overloads: false
+    is_root_operator: true
+    is_used_for_training: true
+  aten::hardtanh.out:
+    debug_info:
+    - mv2.pte
+    include_all_overloads: false
+    is_root_operator: true
+    is_used_for_training: true
+  aten::mean.out:
+    debug_info:
+    - mv2.pte
+    include_all_overloads: false
+    is_root_operator: true
+    is_used_for_training: true
+  aten::permute_copy.out:
+    debug_info:
+    - mv2.pte
+    include_all_overloads: false
+    is_root_operator: true
+    is_used_for_training: true
diff --git a/pytest.ini b/pytest.ini
index 557a307bdf2..d1a8450b6f4 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -58,6 +58,7 @@ addopts =
     runtime
     # Tools
     codegen/test
+    codegen/tools/test
     tools/cmake
     # test TODO: fix these tests
     # test/end2end/test_end2end.py
diff --git a/test/build_size_test.sh b/test/build_size_test.sh
index d020ab58c95..2baafcfcfe3 100644
--- a/test/build_size_test.sh
+++ b/test/build_size_test.sh
@@ -29,6 +29,7 @@ cmake_install_executorch_lib() {
           -DCMAKE_BUILD_TYPE=Release \
           -DEXECUTORCH_BUILD_EXECUTOR_RUNNER=OFF \
           -DEXECUTORCH_OPTIMIZE_SIZE=ON \
+          -DEXECUTORCH_SELECTIVE_BUILD_DTYPE=ON \
           -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
           ${EXTRA_BUILD_ARGS} \
           -Bcmake-out .
@@ -44,11 +45,9 @@ test_cmake_size_test() {
     echo "Build size test"
     cmake --build cmake-out/test -j9 --config Release
 
-    echo 'ExecuTorch with no ops binary size, unstripped:'
-    ls -al cmake-out/test/size_test
-
-    echo 'ExecuTorch with portable ops binary size, unstripped:'
-    ls -al cmake-out/test/size_test_all_ops
+    strip cmake-out/test/size_test
+    strip cmake-out/test/size_test_all_ops
+    ls -lah cmake-out/test/
 }
 
 if [[ -z $PYTHON_EXECUTABLE ]]; then
diff --git a/tools/cmake/Codegen.cmake b/tools/cmake/Codegen.cmake
index f1dac84de43..c4d412ceb0e 100644
--- a/tools/cmake/Codegen.cmake
+++ b/tools/cmake/Codegen.cmake
@@ -12,7 +12,7 @@
 include(${EXECUTORCH_ROOT}/tools/cmake/Utils.cmake)
 
 function(gen_selected_ops)
-  set(arg_names LIB_NAME OPS_SCHEMA_YAML ROOT_OPS INCLUDE_ALL_OPS)
+  set(arg_names LIB_NAME OPS_SCHEMA_YAML ROOT_OPS INCLUDE_ALL_OPS OPS_FROM_MODEL DTYPE_SELECT)
   cmake_parse_arguments(GEN "" "" "${arg_names}" ${ARGN})
 
   message(STATUS "Generating operator lib:")
@@ -20,10 +20,13 @@ function(gen_selected_ops)
   message(STATUS "  OPS_SCHEMA_YAML: ${GEN_OPS_SCHEMA_YAML}")
   message(STATUS "  ROOT_OPS: ${GEN_ROOT_OPS}")
   message(STATUS "  INCLUDE_ALL_OPS: ${GEN_INCLUDE_ALL_OPS}")
+  message(STATUS "  OPS_FROM_MODEL: ${GEN_OPS_FROM_MODEL}")
+  message(STATUS "  DTYPE_SELECT: ${GEN_DTYPE_SELECT}")
 
   set(_oplist_yaml
       ${CMAKE_CURRENT_BINARY_DIR}/${GEN_LIB_NAME}/selected_operators.yaml
   )
+
   file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/${GEN_LIB_NAME})
 
   file(GLOB_RECURSE _codegen_tools_srcs "${EXECUTORCH_ROOT}/codegen/tools/*.py")
@@ -43,6 +46,9 @@ function(gen_selected_ops)
   if(GEN_INCLUDE_ALL_OPS)
     list(APPEND _gen_oplist_command --include_all_operators)
   endif()
+  if(GEN_OPS_FROM_MODEL)
+    list(APPEND _gen_oplist_command --model_file_path="${GEN_OPS_FROM_MODEL}")
+  endif()
 
   message("Command - ${_gen_oplist_command}")
   add_custom_command(
@@ -53,6 +59,23 @@ function(gen_selected_ops)
     WORKING_DIRECTORY ${EXECUTORCH_ROOT}
   )
 
+  if(GEN_DTYPE_SELECT)
+    set(_opvariant_h
+      ${CMAKE_CURRENT_BINARY_DIR}/${GEN_LIB_NAME}/selected_op_variants.h
+    )
+    set(_gen_opvariant_command "${PYTHON_EXECUTABLE}" -m codegen.tools.gen_selected_op_variants
+                          --yaml-file=${_oplist_yaml}
+                          --output-dir=${CMAKE_CURRENT_BINARY_DIR}/${GEN_LIB_NAME}/
+    )
+    message("Command - ${_gen_opvariant_command}")
+    add_custom_command(
+      COMMENT "Generating selected_op_variants.h for ${GEN_LIB_NAME}"
+      OUTPUT ${_opvariant_h}
+      COMMAND ${_gen_opvariant_command}
+      DEPENDS ${_optlist_yaml} ${_codegen_tools_srcs}
+      WORKING_DIRECTORY ${EXECUTORCH_ROOT}
+    )
+  endif()
 endfunction()
 
 # Codegen for registering kernels. Kernels are defined in functions_yaml and
@@ -62,7 +85,7 @@ endfunction()
 # functions_yaml CUSTOM_OPS_YAML custom_ops_yaml )
 function(generate_bindings_for_kernels)
   set(options ADD_EXCEPTION_BOUNDARY)
-  set(arg_names LIB_NAME FUNCTIONS_YAML CUSTOM_OPS_YAML)
+  set(arg_names LIB_NAME FUNCTIONS_YAML CUSTOM_OPS_YAML DTYPE_SELECT)
   cmake_parse_arguments(GEN "${options}" "${arg_names}" "" ${ARGN})
 
   message(STATUS "Generating kernel bindings:")
@@ -70,6 +93,7 @@ function(generate_bindings_for_kernels)
   message(STATUS "  FUNCTIONS_YAML: ${GEN_FUNCTIONS_YAML}")
   message(STATUS "  CUSTOM_OPS_YAML: ${GEN_CUSTOM_OPS_YAML}")
   message(STATUS "  ADD_EXCEPTION_BOUNDARY: ${GEN_ADD_EXCEPTION_BOUNDARY}")
+  message(STATUS "  DTYPE_SELECT: ${GEN_DTYPE_SELECT}")
 
   # Command to generate selected_operators.yaml from custom_ops.yaml.
   file(GLOB_RECURSE _codegen_templates "${EXECUTORCH_ROOT}/codegen/templates/*")
@@ -78,6 +102,13 @@ function(generate_bindings_for_kernels)
   # By default selective build output is selected_operators.yaml
   set(_oplist_yaml ${_out_dir}/selected_operators.yaml)
 
+  # If dtype selective build is enable, force header file to be preserved
+  if(GEN_DTYPE_SELECT)
+    set(_opvariant_h ${_out_dir}/selected_op_variants.h)
+  else()
+    set(_opvariant_h "")
+  endif()
+
   # Command to codegen C++ wrappers to register custom ops to both PyTorch and
   # Executorch runtime.
   execute_process(
@@ -108,6 +139,10 @@ function(generate_bindings_for_kernels)
       ${_out_dir}/Functions.h ${_out_dir}/NativeFunctions.h
   )
 
+  if(GEN_DTYPE_SELECT)
+    list(APPEND _gen_command_sources ${_out_dir}/selected_op_variants.h)
+  endif()
+
   if(GEN_FUNCTIONS_YAML)
     list(APPEND _gen_command --functions-yaml-path=${GEN_FUNCTIONS_YAML})
   endif()
@@ -122,8 +157,9 @@ function(generate_bindings_for_kernels)
     COMMENT "Generating code for kernel registration"
     OUTPUT ${_gen_command_sources}
     COMMAND ${_gen_command}
-    DEPENDS ${_oplist_yaml} ${GEN_CUSTOM_OPS_YAML} ${GEN_FUNCTIONS_YAML}
-            ${_codegen_templates} ${_torchgen_srcs}
+    DEPENDS ${_oplist_yaml} ${_opvariants_h} ${GEN_CUSTOM_OPS_YAML}
+            ${GEN_FUNCTIONS_YAML} ${_codegen_templates}
+	    ${_torchgen_srcs}
     WORKING_DIRECTORY ${EXECUTORCH_ROOT}
   )
   # Make generated file list available in parent scope
@@ -165,22 +201,33 @@ endfunction()
 
 # Generate a runtime lib for registering operators in Executorch
 function(gen_operators_lib)
-  set(multi_arg_names LIB_NAME KERNEL_LIBS DEPS)
+  set(multi_arg_names LIB_NAME KERNEL_LIBS DEPS DTYPE_SELECT)
   cmake_parse_arguments(GEN "" "" "${multi_arg_names}" ${ARGN})
 
   message(STATUS "Generating operator lib:")
   message(STATUS "  LIB_NAME: ${GEN_LIB_NAME}")
   message(STATUS "  KERNEL_LIBS: ${GEN_KERNEL_LIBS}")
   message(STATUS "  DEPS: ${GEN_DEPS}")
+  message(STATUS "  DTYPE_SELECT: ${GEN_DTYPE_SELECT}")
 
   set(_out_dir ${CMAKE_CURRENT_BINARY_DIR}/${GEN_LIB_NAME})
 
   add_library(${GEN_LIB_NAME})
-  target_sources(
-    ${GEN_LIB_NAME}
-    PRIVATE ${_out_dir}/RegisterCodegenUnboxedKernelsEverything.cpp
-            ${_out_dir}/Functions.h ${_out_dir}/NativeFunctions.h
-  )
+  if(GEN_DTYPE_SELECT)
+    target_sources(
+      ${GEN_LIB_NAME}
+      PRIVATE ${_out_dir}/RegisterCodegenUnboxedKernelsEverything.cpp
+              ${_out_dir}/Functions.h ${_out_dir}/NativeFunctions.h
+              ${_out_dir}/selected_op_variants.h
+    )
+  else()
+    target_sources(
+      ${GEN_LIB_NAME}
+      PRIVATE ${_out_dir}/RegisterCodegenUnboxedKernelsEverything.cpp
+              ${_out_dir}/Functions.h ${_out_dir}/NativeFunctions.h
+    )
+  endif()
+
   target_link_libraries(${GEN_LIB_NAME} PRIVATE ${GEN_DEPS})
   if(GEN_KERNEL_LIBS)
     target_link_libraries(${GEN_LIB_NAME} PUBLIC ${GEN_KERNEL_LIBS})
@@ -188,6 +235,9 @@ function(gen_operators_lib)
 
   target_link_options_shared_lib(${GEN_LIB_NAME})
   set(_generated_headers ${_out_dir}/Functions.h ${_out_dir}/NativeFunctions.h)
+  if(GEN_DTYPE_SELECT)
+    list(APPEND _generated_headers ${_out_dir}/selected_op_variants.h)
+  endif()
   set_target_properties(
     ${GEN_LIB_NAME} PROPERTIES PUBLIC_HEADER "${_generated_headers}"
   )