Merge pull request #35 from ynimmaga/surya/fix_hf_models

cavusmustafa · web-flow · commit 82866dbcc002 · 2025-03-19T13:53:46.000-07:00
Fix input for hf models
diff --git a/backends/openvino/runtime/OpenvinoBackend.cpp b/backends/openvino/runtime/OpenvinoBackend.cpp
@@ -170,6 +170,10 @@ ov::element::Type OpenvinoBackend::convert_to_openvino_type(
       return ov::element::i32;
     case exa::ScalarType::Char:
       return ov::element::i8;
+    case exa::ScalarType::Long:
+      return ov::element::i64;
+    case exa::ScalarType::Bool:
+      return ov::element::boolean;
     default:
       throw std::runtime_error("Unsupported scalar type");
   }
diff --git a/examples/openvino/aot_optimize_and_infer.py b/examples/openvino/aot_optimize_and_infer.py
@@ -105,49 +105,41 @@ def load_calibration_dataset(
 
 def infer_model(
     exec_prog: EdgeProgramManager,
-    input_shape,
+    inputs,
     num_iter: int,
     warmup_iter: int,
-    input_path: str,
     output_path: str,
 ) -> float:
     """
     Executes inference and reports the average timing.
 
     :param exec_prog: EdgeProgramManager of the lowered model
-    :param input_shape: The input shape for the model.
+    :param inputs: The inputs for the model.
     :param num_iter: The number of iterations to execute inference for timing.
     :param warmup_iter: The number of iterations to execute inference for warmup before timing.
-    :param input_path: Path to the input tensor file to read the input for inference.
     :param output_path: Path to the output tensor file to save the output of inference..
     :return: The average inference timing.
     """
-    # 1: Load model from buffer
+    # Load model from buffer
     executorch_module = _load_for_executorch_from_buffer(exec_prog.buffer)
 
-    # 2: Initialize inputs
-    if input_path:
-        inputs = (torch.load(input_path, weights_only=False),)
-    else:
-        inputs = (torch.randn(input_shape),)
-
-    # 3: Execute warmup
+    # Execute warmup
     for _i in range(warmup_iter):
         out = executorch_module.run_method("forward", inputs)
 
-    # 4: Execute inference and measure timing
+    # Execute inference and measure timing
     time_total = 0.0
     for _i in range(num_iter):
         time_start = time.time()
         out = executorch_module.run_method("forward", inputs)
         time_end = time.time()
         time_total += time_end - time_start
 
-    # 5: Save output tensor as raw tensor file
+    # Save output tensor as raw tensor file
     if output_path:
         torch.save(out, output_path)
 
-    # 6: Return average inference timing
+    # Return average inference timing
     return time_total / float(num_iter)
 
 
@@ -161,10 +153,10 @@ def validate_model(
     :param calibration_dataset: A DataLoader containing calibration data.
     :return: The accuracy score of the model.
     """
-    # 1: Load model from buffer
+    # Load model from buffer
     executorch_module = _load_for_executorch_from_buffer(exec_prog.buffer)
 
-    # 2: Iterate over the dataset and run the executor
+    # Iterate over the dataset and run the executor
     predictions = []
     targets = []
     for _idx, data in enumerate(calibration_dataset):
@@ -173,7 +165,7 @@ def validate_model(
         out = executorch_module.run_method("forward", (feature,))
         predictions.extend(torch.stack(out).reshape(-1, 1000).argmax(-1))
 
-    # 1: Check accuracy
+    # Check accuracy
     return accuracy_score(predictions, targets)
 
 
@@ -232,7 +224,16 @@ def main(  # noqa: C901
         msg = "Input shape must be a list or tuple."
         raise ValueError(msg)
     # Provide input
-    example_args = (torch.randn(*input_shape),)
+    if input_path:
+        example_args = (torch.load(input_path, weights_only=False),)
+    elif suite == "huggingface":
+        if hasattr(model, "config") and hasattr(model.config, "vocab_size"):
+            vocab_size = model.config.vocab_size
+        else:
+            vocab_size = 30522
+        example_args = (torch.randint(0, vocab_size, input_shape, dtype=torch.int64),)
+    else:
+        example_args = (torch.randn(*input_shape),)
 
     # Export the model to the aten dialect
     aten_dialect: ExportedProgram = export(model, example_args)
@@ -301,7 +302,7 @@ def transform_fn(x):
     if infer:
         print("Start inference of the model:")
         avg_time = infer_model(
-            exec_prog, input_shape, num_iter, warmup_iter, input_path, output_path
+            exec_prog, example_args, num_iter, warmup_iter, output_path
         )
         print(f"Average inference time: {avg_time}")