diff --git a/backends/arm/operator_support/to_copy_support.py b/backends/arm/operator_support/to_copy_support.py
index 343d949c244..7f27d0b5b36 100644
--- a/backends/arm/operator_support/to_copy_support.py
+++ b/backends/arm/operator_support/to_copy_support.py
@@ -4,6 +4,7 @@
 # LICENSE file in the root directory of this source tree.
 
 # pyre-unsafe
+import copy
 import logging
 
 import torch
@@ -42,7 +43,9 @@ def _merge_supported_types(
         dtypes1: SupportedTypeDict,
         dtypes2: SupportedTypeDict,
     ) -> SupportedTypeDict:
-        merged_dtypes = dtypes1
+        merged_dtypes = copy.deepcopy(
+            dtypes1
+        )  # Use deepcopy to avoid unintentionally modifying SUPPORTED_INT_TYPES
         for k, v in dtypes2.items():
             merged_dtypes[k] = merged_dtypes.get(k, []) + v
         return merged_dtypes
diff --git a/backends/arm/test/ops/test_to_copy.py b/backends/arm/test/ops/test_to_copy.py
index 9d873f30ce9..9fcd65dc957 100644
--- a/backends/arm/test/ops/test_to_copy.py
+++ b/backends/arm/test/ops/test_to_copy.py
@@ -12,7 +12,10 @@
 import torch
 
 from executorch.backends.arm.test import common
-from executorch.backends.arm.test.tester.test_pipeline import TosaPipelineMI
+from executorch.backends.arm.test.tester.test_pipeline import (
+    OpNotSupportedPipeline,
+    TosaPipelineMI,
+)
 
 input_t1 = Tuple[torch.Tensor]  # Input x
 
@@ -31,11 +34,14 @@ def forward(self, x: torch.Tensor):
 
 Only test unquantized graphs as explicit casting of dtypes messes with the
 quantization.
+However, the model being exported may have some explicit casting to floating
+point dtypes. The casting or their decomposition should be rejected during
+partition. This test will be coveraged by class TestToCopy_BI.
 
 Note: This is also covered by test_scalars.py.
 """
 
-_TO_COPY_TEST_DATA = {
+_TO_COPY_TEST_DATA_MI = {
     "rand_fp16": lambda: (torch.rand((1, 2, 3, 4), dtype=torch.float16), torch.float32),
     "rand_fp32": lambda: (torch.rand((1, 2, 3, 4), dtype=torch.float32), torch.float16),
     "rand_int8": lambda: (
@@ -53,7 +59,7 @@ def forward(self, x: torch.Tensor):
 }
 
 
-@common.parametrize("test_data", _TO_COPY_TEST_DATA)
+@common.parametrize("test_data", _TO_COPY_TEST_DATA_MI)
 def test_copy_tosa_MI(test_data: Tuple):
     test_tensor, new_dtype = test_data()
 
@@ -64,3 +70,49 @@ def test_copy_tosa_MI(test_data: Tuple):
         exir_op=[],
     )
     pipeline.run()
+
+
+"""
+Casting operations that output floating-point dtypes should be rejected under BI profile,
+rather than introducing an invalid dtype into the tosa graph.
+For example, x.to(dtype=torch.float32) will be eventually lowered to
+exir_ops.edge.dim_order_ops._to_dim_order_copy.default. We should reject this operation
+in ToCopySupported::is_node_tosa_supported() before it goes into the delegated graph.
+"""
+_TO_COPY_TEST_DATA_BI = {
+    "rand_int8_fp32": lambda: (
+        torch.randint(-127, 128, (1, 2, 3, 4), dtype=torch.int8),
+        torch.float32,
+    ),
+    "rand_int16_fp32": lambda: (
+        torch.randint(-127, 128, (1, 2, 3, 4), dtype=torch.int16),
+        torch.float32,
+    ),
+    "rand_int32_fp32": lambda: (
+        torch.randint(-127, 128, (1, 2, 3, 4), dtype=torch.int32),
+        torch.float32,
+    ),
+    "rand_int32_fp16": lambda: (
+        torch.randint(-127, 128, (1, 2, 3, 4), dtype=torch.int32),
+        torch.float16,
+    ),
+    "rand_int32_bf16": lambda: (
+        torch.randint(-127, 128, (1, 2, 3, 4), dtype=torch.int32),
+        torch.bfloat16,
+    ),
+}
+
+
+@common.parametrize("test_data", _TO_COPY_TEST_DATA_BI)
+def test_copy_tosa_BI(test_data: Tuple):
+    test_tensor, new_dtype = test_data()
+
+    pipeline = OpNotSupportedPipeline[input_t1](
+        Cast(new_dtype),
+        (test_tensor,),
+        {
+            "executorch_exir_dialects_edge__ops_dim_order_ops__to_dim_order_copy_default": 1
+        },
+        quantize=True,
+    )
+    pipeline.run()