Merge branch 'main' into use-quantize_

metascroy · web-flow · commit cb9236ddb111 · 2025-06-24T16:38:20.000-07:00
diff --git a/.ci/scripts/benchmark_tooling/README.md b/.ci/scripts/benchmark_tooling/README.md
@@ -77,16 +77,16 @@ python3 .ci/scripts/benchmark_tooling/analyze_benchmark_stability.py \
 
 ##### Filtering Options:
 
-- `--device-pools`: Filter by private device pool names (e.g., "samsung-galaxy-s22-5g", "samsung-galaxy-s22plus-5g")
+- `--device-pools`: Filter by device pool names (e.g., "apple_iphone_15_private", "samsung_s22_private")
 - `--backends`: Filter by specific backend names (e.g.,"xnnpack_q8")
-- `--models`: Filter by specific model names (e.g., "mv3", "meta-llama-llama-3.2-1b-instruct-qlora-int4-eo8")
+- `--models`: Filter by specific model names (e.g., "mv3", "meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8")
 
 #### Example Usage
 
 Filter by multiple private device pools and models:
 ```bash
 # This fetches all private table data for models 'llama-3.2-1B' and 'mv3'
-python3 get_benchmark_analysis_data.py \
+python3 .ci/scripts/benchmark_tooling/get_benchmark_analysis_data.py \
   --startTime "2025-06-01T00:00:00" \
   --endTime "2025-06-11T00:00:00" \
   --device-pools 'apple_iphone_15_private' 'samsung_s22_private' \
@@ -97,7 +97,7 @@ Filter by specific device pool and models:
 ```bash
 # This fetches all private iPhone table data for models 'llama-3.2-1B' and 'mv3',
 # and associated public iPhone data
-python3 get_benchmark_analysis_data.py \
+python3 .ci/scripts/benchmark_tooling/get_benchmark_analysis_data.py \
   --startTime "2025-06-01T00:00:00" \
   --endTime "2025-06-11T00:00:00" \
   --device-pools 'apple_iphone_15_private' \
@@ -140,22 +140,6 @@ fetcher.run(
     end_time="2025-06-17T18:00:00"
 )
 
-# Get results in different formats
-# As DataFrames
-df_results = fetcher.to_df()
-
-# Export to Excel
-fetcher.to_excel(output_dir="./results")
-
-# Export to CSV
-fetcher.to_csv(output_dir="./results")
-
-# Export to JSON
-json_path = fetcher.to_json(output_dir="./results")
-
-# Get raw dictionary results
-dict_results = fetcher.to_dict()
-
 # Use the output_data method for flexible output
 results = fetcher.output_data(output_type="excel", output_dir="./results")
 ```
diff --git a/backends/xnnpack/partition/config/generic_node_configs.py b/backends/xnnpack/partition/config/generic_node_configs.py
@@ -107,6 +107,17 @@ def __init__(self, **kwargs):
     def supported_precision_types(self) -> List[ConfigPrecisionType]:
         return [ConfigPrecisionType.FP32, ConfigPrecisionType.STATIC_QUANT]
 
+    def check_constraints(self, node: torch.fx.Node, ep: ExportedProgram) -> bool:
+        if not self.check_common_constraints(node, ep):
+            return False
+        # No support for add nodes with alpha != 1
+        if "alpha" in node.kwargs and not np.isclose(
+            node.kwargs["alpha"], 1.0, atol=1e-9, rtol=1e-9
+        ):
+            why(node, reason="Add node doesn't support alpha != 1")
+            return False
+        return True
+
 
 class ReLUConfig(GenericNodePartitionerConfig):
     target_name = "relu.default"
diff --git a/backends/xnnpack/test/ops/test_add.py b/backends/xnnpack/test/ops/test_add.py
@@ -240,3 +240,27 @@ def forward(self, x, z):
             .serialize()
             .run_method_and_compare_outputs()
         )
+
+    class AddWithAlpha(torch.nn.Module):
+        def forward(self, x, y):
+            # node with alpha = 1.0 will be partitioned
+            out1 = torch.add(x, y, alpha=1)
+            # node with alpha != 1.0 will not be partitioned
+            out2 = torch.add(x, y, alpha=2)
+            return out1, out2
+
+    def test_add_with_alpha(self):
+        inputs = (torch.randn(1, 1, 4, 4), torch.randn(1, 1, 4, 4))
+        (
+            Tester(self.AddWithAlpha(), inputs)
+            .export()
+            .check_count({"torch.ops.aten.add.Tensor": 2})
+            .to_edge_transform_and_lower()
+            # unpartitioned node
+            .check_count({"executorch_exir_dialects_edge__ops_aten_add_Tensor": 1})
+            # partitioned node
+            .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+            .to_executorch()
+            .serialize()
+            .run_method_and_compare_outputs()
+        )
diff --git a/examples/models/llama/config/llm_config.py b/examples/models/llama/config/llm_config.py
@@ -227,9 +227,9 @@ class ExportConfig:
     export_only: bool = False
 
     def __post_init__(self):
-        if self.max_context_length > self.max_seq_length:
+        if self.max_context_length < self.max_seq_length:
             raise ValueError(
-                f"max_context_length of {self.max_context_length} cannot be greater than max_seq_length of {self.max_seq_length}"
+                f"max_context_length of {self.max_context_length} cannot be shorter than max_seq_length of {self.max_seq_length}"
             )
 
 
diff --git a/runtime/backend/interface.cpp b/runtime/backend/interface.cpp
@@ -66,5 +66,42 @@ Result<const char*> get_backend_name(size_t index) {
   return registered_backends[index].name;
 }
 
+Error set_option(
+    const char* backend_name,
+    const executorch::runtime::Span<executorch::runtime::BackendOption>
+        backend_options) {
+  auto backend_class = get_backend_class(backend_name);
+  if (!backend_class) {
+    return Error::NotFound;
+  }
+
+  BackendOptionContext backend_option_context;
+  Error result =
+      backend_class->set_option(backend_option_context, backend_options);
+  if (result != Error::Ok) {
+    return result;
+  }
+  return Error::Ok;
+}
+
+Error get_option(
+    const char* backend_name,
+    executorch::runtime::Span<executorch::runtime::BackendOption>
+        backend_options) {
+  auto backend_class = get_backend_class(backend_name);
+  if (!backend_class) {
+    return Error::NotFound;
+  }
+  BackendOptionContext backend_option_context;
+  executorch::runtime::Span<BackendOption> backend_options_ref(
+      backend_options.data(), backend_options.size());
+  auto result =
+      backend_class->get_option(backend_option_context, backend_options_ref);
+  if (result != Error::Ok) {
+    return result;
+  }
+  return Error::Ok;
+}
+
 } // namespace ET_RUNTIME_NAMESPACE
 } // namespace executorch
diff --git a/runtime/backend/interface.h b/runtime/backend/interface.h
@@ -183,6 +183,34 @@ size_t get_num_registered_backends();
  */
 Result<const char*> get_backend_name(size_t index);
 
+/**
+ * Sets backend options for a specific backend.
+ *
+ * @param backend_name The name of the backend to set options for
+ * @param backend_options The backend option list containing the options
+ * to set
+ * @return Error::Ok on success, Error::NotFound if backend is not found, or
+ * other error codes on failure
+ */
+Error set_option(
+    const char* backend_name,
+    const executorch::runtime::Span<executorch::runtime::BackendOption>
+        backend_options);
+
+/**
+ * Retrieves backend options for a specific backend.
+ *
+ * @param backend_name The name of the backend to get options from
+ * @param backend_options The backend option objects that will be filled with
+ * the populated values from the backend
+ * @return Error::Ok on success, Error::NotFound if backend is not found, or
+ * other error codes on failure
+ */
+Error get_option(
+    const char* backend_name,
+    executorch::runtime::Span<executorch::runtime::BackendOption>
+        backend_options);
+
 } // namespace ET_RUNTIME_NAMESPACE
 } // namespace executorch
 
diff --git a/runtime/backend/test/backend_interface_update_test.cpp b/runtime/backend/test/backend_interface_update_test.cpp
@@ -7,9 +7,11 @@
  */
 
 #include <executorch/runtime/backend/interface.h>
+#include <executorch/runtime/backend/options.h>
 #include <executorch/runtime/platform/runtime.h>
 
 #include <gtest/gtest.h>
+#include <memory>
 
 using namespace ::testing;
 using executorch::runtime::ArrayRef;
@@ -61,7 +63,8 @@ class MockBackend : public BackendInterface {
     int success_update = 0;
     for (const auto& backend_option : backend_options) {
       if (strcmp(backend_option.key, "Backend") == 0) {
-        if (std::holds_alternative<std::array<char, 256>>(
+        if (std::holds_alternative<
+                std::array<char, executorch::runtime::kMaxOptionValueLength>>(
                 backend_option.value)) {
           // Store the value in our member variable
           const auto& arr =
@@ -285,3 +288,114 @@ TEST_F(BackendInterfaceUpdateTest, UpdateBetweenExecutes) {
   ASSERT_TRUE(mock_backend->target_backend.has_value());
   EXPECT_STREQ(mock_backend->target_backend.value().c_str(), "NPU");
 }
+
+// Mock backend for testing
+class StubBackend : public BackendInterface {
+ public:
+  ~StubBackend() override = default;
+
+  bool is_available() const override {
+    return true;
+  }
+
+  Result<DelegateHandle*> init(
+      BackendInitContext& context,
+      FreeableBuffer* processed,
+      ArrayRef<CompileSpec> compile_specs) const override {
+    return nullptr;
+  }
+
+  Error execute(
+      BackendExecutionContext& context,
+      DelegateHandle* handle,
+      EValue** args) const override {
+    return Error::Ok;
+  }
+
+  Error get_option(
+      BackendOptionContext& context,
+      executorch::runtime::Span<executorch::runtime::BackendOption>&
+          backend_options) override {
+    // For testing purposes, just record that get_option was called
+    // and verify the input parameters
+    get_option_called = true;
+    get_option_call_count++;
+    last_get_option_size = backend_options.size();
+
+    // Verify that the expected option key is present and modify the value
+    for (size_t i = 0; i < backend_options.size(); ++i) {
+      if (strcmp(backend_options[i].key, "NumberOfThreads") == 0) {
+        // Set the value to what was stored by set_option
+        backend_options[i].value = last_num_threads;
+        found_expected_key = true;
+        break;
+      }
+    }
+
+    return Error::Ok;
+  }
+
+  Error set_option(
+      BackendOptionContext& context,
+      const executorch::runtime::Span<executorch::runtime::BackendOption>&
+          backend_options) override {
+    // Store the options for verification
+    last_options_size = backend_options.size();
+    if (backend_options.size() > 0) {
+      for (const auto& option : backend_options) {
+        if (strcmp(option.key, "NumberOfThreads") == 0) {
+          if (auto* val = std::get_if<int>(&option.value)) {
+            last_num_threads = *val;
+          }
+        }
+      }
+    }
+    return Error::Ok;
+  }
+
+  // Mutable for testing verification
+  size_t last_options_size = 0;
+  int last_num_threads = 0;
+  bool get_option_called = false;
+  int get_option_call_count = 0;
+  size_t last_get_option_size = 0;
+  bool found_expected_key = false;
+};
+
+class BackendUpdateTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    // Since these tests cause ET_LOG to be called, the PAL must be initialized
+    // first.
+    executorch::runtime::runtime_init();
+
+    // Register the stub backend
+    stub_backend = std::make_unique<StubBackend>();
+    Backend backend_config{"StubBackend", stub_backend.get()};
+    auto register_result = register_backend(backend_config);
+    ASSERT_EQ(register_result, Error::Ok);
+  }
+
+  std::unique_ptr<StubBackend> stub_backend;
+};
+
+// Test basic string functionality
+TEST_F(BackendUpdateTest, TestSetGetOption) {
+  BackendOptions<1> backend_options;
+  int new_num_threads = 4;
+  backend_options.set_option("NumberOfThreads", new_num_threads);
+
+  auto status = set_option("StubBackend", backend_options.view());
+  ASSERT_EQ(status, Error::Ok);
+
+  // Set up the default option, which will be populuated by the get_option call
+  BackendOption ref_backend_option{"NumberOfThreads", 0};
+  status = get_option("StubBackend", ref_backend_option);
+
+  // Verify that the backend actually received the options
+  ASSERT_TRUE(std::get<int>(ref_backend_option.value) == new_num_threads);
+
+  // Verify that the backend actually update the options
+  ASSERT_EQ(stub_backend->last_options_size, 1);
+  ASSERT_EQ(stub_backend->last_num_threads, new_num_threads);
+}

Original file line number	Diff line number	Diff line change
`@@ -227,9 +227,9 @@ class ExportConfig:`
`227`	`227`	`export_only: bool = False`
`228`	`228`
`229`	`229`	`def __post_init__(self):`
`230`		`- if self.max_context_length > self.max_seq_length:`
	`230`	`+ if self.max_context_length < self.max_seq_length:`
`231`	`231`	`raise ValueError(`
`232`		`- f"max_context_length of {self.max_context_length} cannot be greater than max_seq_length of {self.max_seq_length}"`
	`232`	`+ f"max_context_length of {self.max_context_length} cannot be shorter than max_seq_length of {self.max_seq_length}"`
`233`	`233`	`)`
`234`	`234`
`235`	`235`