[ET-VK][ez] enabling fp64->fp32 converison for vulkan compatibility

ahmtox · web-flow · commit 749ced72e35e · 2025-07-14T07:58:00.000-07:00
Differential Revision: D77746137 Pull Request resolved: #12201
diff --git a/backends/vulkan/runtime/VulkanBackend.cpp b/backends/vulkan/runtime/VulkanBackend.cpp
@@ -83,10 +83,14 @@ vkapi::ScalarType get_scalar_type(const vkgraph::VkDataType& vk_datatype) {
       return vkapi::kChar;
     case vkgraph::VkDataType::INT32:
       return vkapi::kInt;
+    case vkgraph::VkDataType::INT64:
+      return vkapi::kLong;
     case vkgraph::VkDataType::FLOAT16:
       return vkapi::kHalf;
     case vkgraph::VkDataType::FLOAT32:
       return vkapi::kFloat;
+    case vkgraph::VkDataType::FLOAT64:
+      return vkapi::kDouble;
   }
 }
 
diff --git a/backends/vulkan/serialization/schema.fbs b/backends/vulkan/serialization/schema.fbs
@@ -18,6 +18,8 @@ enum VkDataType : byte {
   INT32 = 3,
   FLOAT16 = 4,
   FLOAT32 = 5,
+  FLOAT64 = 6,
+  INT64 = 7,
 }
 
 // Describes what kind of GPU resource should be used to represent a tensor. The
diff --git a/backends/vulkan/serialization/vulkan_graph_builder.py b/backends/vulkan/serialization/vulkan_graph_builder.py
@@ -45,9 +45,11 @@ def __init__(
         self,
         program: ExportedProgram,
         delegate_mapping_builder: DelegateMappingBuilder,
+        downcast_64_bit: bool = True,
     ) -> None:
         self.program = program
         self.delegate_mapping_builder = delegate_mapping_builder
+        self.downcast_64_bit = downcast_64_bit
         self.chain = []
         self.values = []
         self.input_ids = []
@@ -72,13 +74,14 @@ def get_vk_datatype(torch_dtype: torch.dtype) -> vk_graph_schema.VkDataType:
             return vk_graph_schema.VkDataType.INT8
         elif torch_dtype == torch.int32:
             return vk_graph_schema.VkDataType.INT32
+        elif torch_dtype == torch.int64:
+            return vk_graph_schema.VkDataType.INT64
         elif torch_dtype == torch.float16:
             return vk_graph_schema.VkDataType.FLOAT16
         elif torch_dtype == torch.float32:
             return vk_graph_schema.VkDataType.FLOAT32
-        # Narrowing conversion for index tensor produced by max_poolNd_with_indices.
-        elif torch_dtype == torch.int64:
-            return vk_graph_schema.VkDataType.INT32
+        elif torch_dtype == torch.float64:
+            return vk_graph_schema.VkDataType.FLOAT64
         else:
             raise AssertionError(f"Invalid dtype for vulkan_preprocess ({torch_dtype})")
 
@@ -201,11 +204,20 @@ def create_tensor_value(self, spec: TensorSpec, constant_id: int = -1) -> int:
             # pyre-ignore[16]
             memory_layout = spec.vk_memory_layout
 
+        # Apply downcast logic before getting VK datatype
+        effective_dtype = spec.dtype
+        if self.downcast_64_bit and spec.dtype == torch.float64:
+            effective_dtype = torch.float32
+        elif self.downcast_64_bit and spec.dtype == torch.int64:
+            effective_dtype = torch.int32
+
+        datatype = self.get_vk_datatype(effective_dtype)
+
         new_id = len(self.values)
         self.values.append(
             vk_graph_schema.VkValue(
                 value=vk_graph_schema.VkTensor(
-                    datatype=self.get_vk_datatype(spec.dtype),
+                    datatype=datatype,
                     dims=spec.shape,
                     constant_id=constant_id,
                     mem_obj_id=mem_obj_id,
diff --git a/backends/vulkan/serialization/vulkan_graph_schema.py b/backends/vulkan/serialization/vulkan_graph_schema.py
@@ -29,6 +29,8 @@ class VkDataType(IntEnum):
     INT32 = 3
     FLOAT16 = 4
     FLOAT32 = 5
+    FLOAT64 = 6
+    INT64 = 7
 
 
 class VkStorageType(IntEnum):
diff --git a/backends/vulkan/vulkan_preprocess.py b/backends/vulkan/vulkan_preprocess.py
@@ -67,7 +67,6 @@
 # pyre-ignore
 def apply_passes(program: ExportedProgram, passes) -> ExportedProgram:
     for p in passes:
-
         if issubclass(type(p), ExportPass) or issubclass(type(p), PassBase):
             new_gm = program.graph_module
             # This is a workaround to allow the memory planning pass to work without
@@ -110,6 +109,9 @@ def parse_compile_spec(compile_specs: List[CompileSpec]) -> Dict[str, Any]:
         if spec.key == "skip_tag_memory_metadata":
             options[spec.key] = bool.from_bytes(spec.value, byteorder="little")
 
+        if spec.key == "downcast_64_bit":
+            options[spec.key] = bool.from_bytes(spec.value, byteorder="little")
+
         # Unhandled options are ignored
 
     return options
@@ -142,6 +144,7 @@ def preprocess(  # noqa: C901
         default_memory_layout = compile_options.get(
             "memory_layout_override", VkMemoryLayout.TENSOR_WIDTH_PACKED
         )
+        downcast_64_bit = compile_options.get("downcast_64_bit", True)
 
         program = unsafe_remove_auto_functionalized_pass(program)
 
@@ -213,7 +216,9 @@ def preprocess(  # noqa: C901
         )
 
         graph_builder = VkGraphBuilder(
-            program, DelegateMappingBuilder(generated_identifiers=True)
+            program,
+            DelegateMappingBuilder(generated_identifiers=True),
+            downcast_64_bit=downcast_64_bit,
         )
         vk_graph = graph_builder.build_graph()
 

Original file line number	Diff line number	Diff line change
`@@ -83,10 +83,14 @@ vkapi::ScalarType get_scalar_type(const vkgraph::VkDataType& vk_datatype) {`
`83`	`83`	`return vkapi::kChar;`
`84`	`84`	`case vkgraph::VkDataType::INT32:`
`85`	`85`	`return vkapi::kInt;`
	`86`	`+ case vkgraph::VkDataType::INT64:`
	`87`	`+ return vkapi::kLong;`
`86`	`88`	`case vkgraph::VkDataType::FLOAT16:`
`87`	`89`	`return vkapi::kHalf;`
`88`	`90`	`case vkgraph::VkDataType::FLOAT32:`
`89`	`91`	`return vkapi::kFloat;`
	`92`	`+ case vkgraph::VkDataType::FLOAT64:`
	`93`	`+ return vkapi::kDouble;`
`90`	`94`	`}`
`91`	`95`	`}`
`92`	`96`
Original file line number	Diff line number	Diff line change
`@@ -18,6 +18,8 @@ enum VkDataType : byte {`
`18`	`18`	`INT32 = 3,`
`19`	`19`	`FLOAT16 = 4,`
`20`	`20`	`FLOAT32 = 5,`
	`21`	`+ FLOAT64 = 6,`
	`22`	`+ INT64 = 7,`
`21`	`23`	`}`
`22`	`24`
`23`	`25`	`// Describes what kind of GPU resource should be used to represent a tensor. The`