Update base for Update on "[ET-VK] Add 'half' variants to some Llama operators + enable llama vulkan export with force_fp16 flag"

ssjia · ssjia · commit c88b97e22e68 · 2025-09-11T13:19:38.000-07:00
Title says it all! Differential Revision: [D82234179](https://our.internmc.facebook.com/intern/diff/D82234179/) cc manuelcandales cbilgin [ghstack-poisoned]
diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
@@ -963,12 +963,6 @@ jobs:
           python -m examples.vulkan.export --model_name=$model --test
         done
 
-        # Test some models with the --force-fp16 flag to ensure that it works
-        fp16_models="mv2 edsr resnet18"
-        for model in $fp16_models; do
-          python -m examples.vulkan.export --model_name=$model -fp16 --test
-        done
-
 
   test-vulkan-operators-linux:
     name: test-vulkan-operators-linux
diff --git a/backends/vulkan/runtime/VulkanBackend.cpp b/backends/vulkan/runtime/VulkanBackend.cpp
@@ -91,6 +91,30 @@ vkapi::ScalarType get_scalar_type(const vkgraph::VkDataType& vk_datatype) {
   }
 }
 
+vkapi::ScalarType equivalent_scalar_type(
+    const executorch::runtime::etensor::ScalarType& et_datatype) {
+  switch (et_datatype) {
+    case executorch::runtime::etensor::ScalarType::Byte:
+      return vkapi::kByte;
+    case executorch::runtime::etensor::ScalarType::Char:
+      return vkapi::kChar;
+    case executorch::runtime::etensor::ScalarType::Int:
+      return vkapi::kInt;
+    case executorch::runtime::etensor::ScalarType::Long:
+      return vkapi::kLong;
+    case executorch::runtime::etensor::ScalarType::Half:
+      return vkapi::kHalf;
+    case executorch::runtime::etensor::ScalarType::Float:
+      return vkapi::kFloat;
+    case executorch::runtime::etensor::ScalarType::Double:
+      return vkapi::kDouble;
+    case executorch::runtime::etensor::ScalarType::Bool:
+      return vkapi::kBool;
+    default:
+      VK_THROW("Invalid etensor::ScalarType encountered!");
+  }
+}
+
 utils::StorageType get_storage_type(
     const vkgraph::VkStorageType& vk_storage_type) {
   switch (vk_storage_type) {
@@ -599,10 +623,11 @@ class VulkanBackend final : public ::executorch::runtime::BackendInterface {
         bool was_resized =
             maybe_resize_input(compute_graph, i, args[i]->toTensor());
         should_propagate_resize = should_propagate_resize || was_resized;
-        compute_graph->copy_into_staging(
+        compute_graph->maybe_cast_and_copy_into_staging(
             compute_graph->inputs()[i].staging,
             args[i]->toTensor().const_data_ptr(),
-            args[i]->toTensor().numel());
+            args[i]->toTensor().numel(),
+            equivalent_scalar_type(args[i]->toTensor().scalar_type()));
       } else if (compute_graph->val_is_symint(iref)) {
         VK_CHECK_COND(
             args[i]->isTensor(),
@@ -634,10 +659,11 @@ class VulkanBackend final : public ::executorch::runtime::BackendInterface {
         maybe_resize_output(compute_graph, i, args[o]->toTensor());
         // args holds inputs directly followed by outputs, so the i'th output
         // for compute_graph corresponds to the o'th arg
-        compute_graph->copy_from_staging(
+        compute_graph->maybe_cast_and_copy_from_staging(
             compute_graph->outputs()[i].staging,
             args[o]->toTensor().mutable_data_ptr(),
-            args[o]->toTensor().numel());
+            args[o]->toTensor().numel(),
+            equivalent_scalar_type(args[o]->toTensor().scalar_type()));
       }
       // TensorRef values represent constant tensors which will not have been
       // modified by the graph execution. Therefore, if a constant tensor is
diff --git a/backends/vulkan/runtime/graph/ComputeGraph.cpp b/backends/vulkan/runtime/graph/ComputeGraph.cpp
@@ -863,6 +863,36 @@ void ComputeGraph::copy_into_staging(
   staging->copy_from(data, nbytes);
 }
 
+void ComputeGraph::maybe_cast_and_copy_into_staging(
+    const ValueRef idx,
+    const void* data,
+    const size_t numel,
+    const vkapi::ScalarType src_data_dtype) {
+  StagingPtr staging = get_staging(idx);
+  vkapi::ScalarType staging_dtype = staging->dtype();
+  if (src_data_dtype == staging_dtype) {
+    size_t nbytes = numel * vkapi::element_size(staging_dtype);
+    staging->copy_from(data, nbytes);
+    return;
+  } else {
+    // Hard-coded type conversion cases
+    if (src_data_dtype == vkapi::kLong && staging_dtype == vkapi::kInt) {
+      const int64_t* casted_data = reinterpret_cast<const int64_t*>(data);
+      staging->cast_and_copy_from<int64_t, int32_t>(casted_data, numel);
+    } else if (
+        src_data_dtype == vkapi::kDouble && staging_dtype == vkapi::kFloat) {
+      const double* casted_data = reinterpret_cast<const double*>(data);
+      staging->cast_and_copy_from<double, float>(casted_data, numel);
+    } else {
+      VK_THROW(
+          "Unsupported type conversion from ",
+          src_data_dtype,
+          " to staging dtype ",
+          staging_dtype);
+    }
+  }
+}
+
 void ComputeGraph::copy_from_staging(
     const ValueRef idx,
     void* data,
@@ -872,6 +902,36 @@ void ComputeGraph::copy_from_staging(
   staging->copy_to(data, nbytes);
 }
 
+void ComputeGraph::maybe_cast_and_copy_from_staging(
+    const ValueRef idx,
+    void* data,
+    const size_t numel,
+    const vkapi::ScalarType dst_data_dtype) {
+  StagingPtr staging = get_staging(idx);
+  vkapi::ScalarType staging_dtype = staging->dtype();
+  if (dst_data_dtype == staging_dtype) {
+    size_t nbytes = numel * vkapi::element_size(staging_dtype);
+    staging->copy_to(data, nbytes);
+    return;
+  } else {
+    // Hard-coded type conversion cases
+    if (dst_data_dtype == vkapi::kLong && staging_dtype == vkapi::kInt) {
+      int64_t* casted_data = reinterpret_cast<int64_t*>(data);
+      staging->cast_and_copy_to<int32_t, int64_t>(casted_data, numel);
+    } else if (
+        dst_data_dtype == vkapi::kDouble && staging_dtype == vkapi::kFloat) {
+      double* casted_data = reinterpret_cast<double*>(data);
+      staging->cast_and_copy_to<float, double>(casted_data, numel);
+    } else {
+      VK_THROW(
+          "Unsupported type conversion from staging dtype ",
+          staging_dtype,
+          " to ",
+          dst_data_dtype);
+    }
+  }
+}
+
 void ComputeGraph::prepare() {
 #define MERGE_FIELD(field)                    \
   static_cast<uint32_t>(std::ceil(            \
diff --git a/backends/vulkan/runtime/graph/ComputeGraph.h b/backends/vulkan/runtime/graph/ComputeGraph.h
@@ -956,8 +956,21 @@ class ComputeGraph final {
 
   void
   copy_into_staging(const ValueRef idx, const void* data, const size_t numel);
+
+  void maybe_cast_and_copy_into_staging(
+      const ValueRef idx,
+      const void* data,
+      const size_t numel,
+      const vkapi::ScalarType src_data_dtype);
+
   void copy_from_staging(const ValueRef idx, void* data, const size_t numel);
 
+  void maybe_cast_and_copy_from_staging(
+      const ValueRef idx,
+      void* data,
+      const size_t numel,
+      const vkapi::ScalarType dst_data_dtype);
+
  protected:
   // Command Buffer Management
 
diff --git a/backends/vulkan/runtime/graph/ops/glsl/buffer_to_nchw.yaml b/backends/vulkan/runtime/graph/ops/glsl/buffer_to_nchw.yaml
@@ -21,6 +21,5 @@ buffer_to_nchw:
         - parameter_values: [int8, int8]
         - parameter_values: [uint8, uint8]
         - parameter_values: [int32, int32]
-        - parameter_values: [int32, int64]
   shader_variants:
     - NAME: buffer_to_nchw
diff --git a/backends/vulkan/runtime/graph/ops/glsl/image_to_nchw.yaml b/backends/vulkan/runtime/graph/ops/glsl/image_to_nchw.yaml
@@ -22,7 +22,6 @@ image_to_nchw:
         - parameter_values: [int8, int8]
         - parameter_values: [uint8, uint8]
         - parameter_values: [int32, int32]
-        - parameter_values: [int32, int64]
   shader_variants:
     - NAME: image_to_nchw_texture3d
     - NAME: image_to_nchw_texture2d
diff --git a/backends/vulkan/runtime/graph/ops/glsl/nchw_to_buffer.yaml b/backends/vulkan/runtime/graph/ops/glsl/nchw_to_buffer.yaml
@@ -21,6 +21,5 @@ nchw_to_buffer:
         - parameter_values: [int8, int8]
         - parameter_values: [uint8, uint8]
         - parameter_values: [int32, int32]
-        - parameter_values: [int32, int64]
   shader_variants:
     - NAME: nchw_to_buffer
diff --git a/backends/vulkan/runtime/graph/ops/glsl/nchw_to_image.yaml b/backends/vulkan/runtime/graph/ops/glsl/nchw_to_image.yaml
@@ -21,7 +21,6 @@ nchw_to_image:
         - parameter_values: [int8, int8]
         - parameter_values: [uint8, uint8]
         - parameter_values: [int32, int32]
-        - parameter_values: [int32, int64]
   shader_variants:
     - NAME: nchw_to_image_texture3d
     - NAME: nchw_to_image_texture2d
diff --git a/backends/vulkan/serialization/vulkan_graph_builder.py b/backends/vulkan/serialization/vulkan_graph_builder.py
@@ -240,6 +240,19 @@ def get_effective_dtype(self, dtype: torch.dtype) -> torch.dtype:
         else:
             return dtype
 
+    def get_staging_dtype(self, dtype: torch.dtype) -> torch.dtype:
+        # Since 64 bit types are not guaranteed to be supported on all GPUs,
+        # the conversion between 32 bit and 64 bit types is handled on the CPU
+        # side. The conversion will occur when copying the staging buffer
+        # contents to/from ETensor data pointers, rather than in the shader to
+        # copy between GPU buffer/image to staging buffer.
+        if self.downcast_64_bit and dtype == torch.float64:
+            return torch.float32
+        elif self.downcast_64_bit and dtype == torch.int64:
+            return torch.int32
+        else:
+            return dtype
+
     def create_tensor_value(self, spec: TensorSpec, constant_id: int = -1) -> int:
         # Negative id indicates that this tensor will have its own dedicated memory.
         mem_obj_id = -1
@@ -258,7 +271,9 @@ def create_tensor_value(self, spec: TensorSpec, constant_id: int = -1) -> int:
         # For constant tensors, the datatype of the original tensor will have been
         # converted to the effective dtype. Otherwise, the type of the staging buffer
         # for inputs/outputs should match the original tensor dtype.
-        staging_dtype = effective_dtype if constant_id >= 0 else spec.dtype
+        staging_dtype = (
+            effective_dtype if constant_id >= 0 else self.get_staging_dtype(spec.dtype)
+        )
 
         datatype = self.get_vk_datatype(effective_dtype)
         staging_datatype = self.get_vk_datatype(staging_dtype)