From 567b9ba82f87bda7c31a7445cc366bc7a4724757 Mon Sep 17 00:00:00 2001
From: morelos <morelos@devvm4573.ash0.facebook.com>
Date: Thu, 3 Jul 2025 11:17:10 -0700
Subject: [PATCH 1/2] [ET-VK] lowering ExecuTorch tensor dtype for Vulkan
 tensor dtype to enable 64bit

# Context

We are aligning with other delegate in how they handle 64bit output dtypes. In this case, we only previously had support for integers, but this is also adding support for doubles. We convert the values in place so that we can be more performant.

# Changes

Add a conversion from 64bit output to 32bit output so that its compatible with vulkan.

Differential Revision: [D77746134](https://our.internmc.facebook.com/intern/diff/D77746134/)

[ghstack-poisoned]
---
 backends/vulkan/runtime/VulkanBackend.cpp | 29 +++++++++++++++++++++++
 1 file changed, 29 insertions(+)
diff --git a/backends/vulkan/runtime/VulkanBackend.cpp b/backends/vulkan/runtime/VulkanBackend.cpp
index 7077a9df59c..47134d7cf51 100644
--- a/backends/vulkan/runtime/VulkanBackend.cpp
+++ b/backends/vulkan/runtime/VulkanBackend.cpp
@@ -599,12 +599,41 @@ class VulkanBackend final : public ::executorch::runtime::BackendInterface {
       if (compute_graph->val_is_tensor(oref)) {
         VK_CHECK_COND(args[o]->isTensor());
         maybe_resize_output(compute_graph, i, args[o]->toTensor());
+
+        // Get the Vulkan tensor dtype and ExecutorTorch tensor dtype
+        vTensorPtr vulkan_tensor = compute_graph->get_tensor(oref);
+        vkapi::ScalarType vulkan_dtype = vulkan_tensor->dtype();
+        executorch::aten::ScalarType et_dtype =
+            args[o]->toTensor().scalar_type();
+
         // args holds inputs directly followed by outputs, so the i'th output
         // for compute_graph corresponds to the o'th arg
         compute_graph->copy_from_staging(
             compute_graph->outputs()[i].staging,
             args[o]->toTensor().mutable_data_ptr(),
             args[o]->toTensor().numel());
+
+        // Handle dtype conversion between Vulkan and ExecutorTorch (in-place)
+        if (vulkan_dtype == vkapi::kFloat &&
+            et_dtype == executorch::aten::ScalarType::Double) {
+          // Convert float32 to float64 in-place (backwards to avoid overwriting)
+          double* data_64 = args[o]->toTensor().mutable_data_ptr<double>();
+          const float* data_32 = args[o]->toTensor().const_data_ptr<float>();
+          for (size_t j = args[o]->toTensor().numel() - 1; j >= 0; --j) {
+            data_64[j] = static_cast<double>(data_32[j]);
+            if (j == 0) break; // Prevent underflow for size_t
+          }
+        } else if (
+            vulkan_dtype == vkapi::kInt &&
+            et_dtype == executorch::aten::ScalarType::Long) {
+          // Convert int32 to int64 in-place (backwards to avoid overwriting)
+          int64_t* data_64 = args[o]->toTensor().mutable_data_ptr<int64_t>();
+          const int32_t* data_32 = args[o]->toTensor().const_data_ptr<int32_t>();
+          for (size_t j = args[o]->toTensor().numel() - 1; j >= 0; --j) {
+            data_64[j] = static_cast<int64_t>(data_32[j]);
+            if (j == 0) break; // Prevent underflow for size_t
+          }
+        }
       }
       // TensorRef values represent constant tensors which will not have been
       // modified by the graph execution. Therefore, if a constant tensor is

From 9bbbc435ef800ae9fe62b2f42b3fb41fa4fd4a5e Mon Sep 17 00:00:00 2001
From: morelos <morelos@devvm4573.ash0.facebook.com>
Date: Thu, 3 Jul 2025 11:27:40 -0700
Subject: [PATCH 2/2] Update on "[ET-VK] lowering ExecuTorch tensor dtype for
 Vulkan tensor dtype to enable 64bit"

# Context

We are aligning with other delegate in how they handle 64bit output dtypes. In this case, we only previously had support for integers, but this is also adding support for doubles. We convert the values in place so that we can be more performant.

# Changes

Add a conversion from 64bit output to 32bit output so that its compatible with vulkan.

Differential Revision: [D77746134](https://our.internmc.facebook.com/intern/diff/D77746134/)

cc SS-JIA manuelcandales cbilgin

[ghstack-poisoned]
---
 backends/vulkan/runtime/VulkanBackend.cpp | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/backends/vulkan/runtime/VulkanBackend.cpp b/backends/vulkan/runtime/VulkanBackend.cpp
index 47134d7cf51..d40a5f3ae44 100644
--- a/backends/vulkan/runtime/VulkanBackend.cpp
+++ b/backends/vulkan/runtime/VulkanBackend.cpp
@@ -616,22 +616,26 @@ class VulkanBackend final : public ::executorch::runtime::BackendInterface {
         // Handle dtype conversion between Vulkan and ExecutorTorch (in-place)
         if (vulkan_dtype == vkapi::kFloat &&
             et_dtype == executorch::aten::ScalarType::Double) {
-          // Convert float32 to float64 in-place (backwards to avoid overwriting)
+          // Convert float32 to float64 in-place (backwards to avoid
+          // overwriting)
           double* data_64 = args[o]->toTensor().mutable_data_ptr<double>();
           const float* data_32 = args[o]->toTensor().const_data_ptr<float>();
           for (size_t j = args[o]->toTensor().numel() - 1; j >= 0; --j) {
             data_64[j] = static_cast<double>(data_32[j]);
-            if (j == 0) break; // Prevent underflow for size_t
+            if (j == 0)
+              break; // Prevent underflow for size_t
           }
         } else if (
             vulkan_dtype == vkapi::kInt &&
             et_dtype == executorch::aten::ScalarType::Long) {
           // Convert int32 to int64 in-place (backwards to avoid overwriting)
           int64_t* data_64 = args[o]->toTensor().mutable_data_ptr<int64_t>();
-          const int32_t* data_32 = args[o]->toTensor().const_data_ptr<int32_t>();
+          const int32_t* data_32 =
+              args[o]->toTensor().const_data_ptr<int32_t>();
           for (size_t j = args[o]->toTensor().numel() - 1; j >= 0; --j) {
             data_64[j] = static_cast<int64_t>(data_32[j]);
-            if (j == 0) break; // Prevent underflow for size_t
+            if (j == 0)
+              break; // Prevent underflow for size_t
           }
         }
       }