fix error test

yangw-dev · yangw-dev · commit 25769dac67cb · 2025-06-18T11:28:59.000-07:00
Signed-off-by: Yang Wang &lt;elainewy@meta.com&gt;
diff --git a/.ci/scripts/benchmark_tooling/README.md b/.ci/scripts/benchmark_tooling/README.md
@@ -44,6 +44,7 @@ python3 .ci/scripts/benchmark_tooling/get_benchmark_analysis_data.py \
 
 ##### Filtering Options:
 Notice, the filter needs full name matchings with correct format, to see all the options of the filter choices, please run the script with `--print-all-table-info`, and pay attention to section `Full list of table info from HUD API` with the field 'info', which contains normalized data we use to filter records from the original metadata 'groupInfo'.
+The filter block any record if it does not in any of the filter keywords.
 
 - `--devices`: Filter by specific device names (e.g., "samsung-galaxy-s22-5g", "samsung-galaxy-s22plus-5g")
 - `--backends`: Filter by specific backend names (e.g.,  "qnn-q8" , ""llama3-spinquan)
diff --git a/.ci/scripts/benchmark_tooling/get_benchmark_analysis_data.py b/.ci/scripts/benchmark_tooling/get_benchmark_analysis_data.py
@@ -525,18 +525,7 @@ def _generate_table_name(
             for k in fields
             if k in group_info and group_info[k]
         )
-        if "(private)" in name:
-            name = name.replace("(private)", "")
-        return name
 
-    def _generate_matching_name(self, group_info: dict, fields: list[str]) -> str:
-        info = deepcopy(group_info)
-        name = "_".join(
-            self.normalize_string(info[k]) for k in fields if k in info and info[k]
-        )
-        if "(private)" in name:
-            name = name.replace("(private)", "")
-            # name = name +'(private)'
         return name
 
     def _process(
@@ -562,23 +551,28 @@ def _process(
         public = []
 
         for item in data:
-            # normalized string values groupInfo to info
-            item["info"] = {
-                k: self.normalize_string(v)
-                for k, v in item.get("groupInfo", {}).items()
-                if v is not None and isinstance(v, str)
-            }
+            org_group = item.get("groupInfo", {})
+            if "info" not in item:
+                item["info"] = {}
+            if org_group.get("device", "").find("private") != -1:
+                item["info"]["aws_type"] = "private"
+            else:
+                item["info"]["aws_type"] = "public"
+                public.append(item)
+
+            # Merge normalized groupInfo string values into item["info"]
+            item["info"].update(
+                {
+                    k: self.normalize_string(v)
+                    for k, v in item.get("groupInfo", {}).items()
+                    if v is not None and isinstance(v, str)
+                }
+            )
             group = item.get("info", {})
             # Add full name joined by the group key fields
             item["table_name"] = self._generate_table_name(
                 group, self.query_group_table_by_fields
             )
-            # Mark aws_type: private or public
-            if group.get("device", "").find("private") != -1:
-                item["info"]["aws_type"] = "private"
-            else:
-                item["info"]["aws_type"] = "public"
-                public.append(item)
         raw_data = deepcopy(data)
 
         # applies customized filters if any
@@ -646,6 +640,7 @@ def normalize_string(self, s: str) -> str:
         s = re.sub(r"-{2,}", "-", s)
         s = s.replace("-(", "(").replace("(-", "(")
         s = s.replace(")-", ")").replace("-)", ")")
+        s = s.replace("(private)", "")
         return s
 
     def filter_results(
@@ -678,7 +673,7 @@ def filter_results(
             info = item.get("info", {})
             if backends and info.get("backend") not in backends:
                 continue
-            if devices and not any(dev in info.get("device", "") for dev in devices):
+            if devices and info.get("device", "") not in devices:
                 continue
             if models and info.get("model", "") not in models:
                 continue
@@ -688,7 +683,7 @@ def filter_results(
         if after_len == 0:
             logging.info(
                 "it seems like there is no result matches the filter values"
-                ", please run script --no-silent again, and search for values in field"
+                ", please run script --list-all-table-info again, and search for values in field"
                 " 'info' for right format"
             )
         return results
@@ -742,15 +737,17 @@ def argparsers():
     parser.add_argument(
         "--backends",
         nargs="+",
-        help="Filter results by one or more backend full name(e.g. --backend qlora mv3) (OR logic)",
+        help="Filter results by one or more backend full name(e.g. --backend qlora mv3) (OR logic within backends scope, AND logic with other filter type)",
     )
     parser.add_argument(
         "--devices",
         nargs="+",
-        help="Filter results by one or more device names (e.g. --devices samsung-galaxy-s22-5g)(OR logic)",
+        help="Filter results by one or more device names (e.g. --devices samsung-galaxy-s22-5g)(OR logic within devices, AND logic with other filter type)",
     )
     parser.add_argument(
-        "--models", nargs="+", help="Filter by one or more models (OR logic)"
+        "--models",
+        nargs="+",
+        help="Filter by one or more models (OR logic withn models scope, AND logic with other filter type)",
     )
     return parser.parse_args()
 
diff --git a/.ci/scripts/tests/test_get_benchmark_analysis_data.py b/.ci/scripts/tests/test_get_benchmark_analysis_data.py
@@ -206,7 +206,7 @@ def test_normalize_string(self):
             ("test_string", "test-string"),
             ("test string", "test-string"),
             ("test--string", "test-string"),
-            ("test (private)", "test(private)"),
+            ("test  (private)", "test"),
             ("test@#$%^&*", "test-"),
         ]
 
@@ -335,6 +335,97 @@ def test_filter_public_result(self):
         result = self.fetcher._filter_public_result(private_list, public_list)
         self.assertEqual(result, expected)
 
+    def test_filter_results(self):
+        """Test filter_results method with various filter combinations."""
+        # Create test data
+        test_data = [
+            {
+                "info": {
+                    "model": "llama3",
+                    "backend": "qlora",
+                    "device": "iphone-15-pro-max",
+                    "arch": "ios-17",
+                },
+                "rows": [{"metric_1": 1.0}],
+            },
+            {
+                "info": {
+                    "model": "llama3",
+                    "backend": "spinquant",
+                    "device": "iphone-15-pro-max",
+                    "arch": "ios-17",
+                },
+                "rows": [{"metric_1": 2.0}],
+            },
+            {
+                "info": {
+                    "model": "mv3",
+                    "backend": "xnnpack-q8",
+                    "device": "samsung-galaxy-s22-5g",
+                    "arch": "android-13",
+                },
+                "rows": [{"metric_1": 3.0}],
+            },
+            {
+                "info": {
+                    "model": "mv3",
+                    "backend": "qnn-q8",
+                    "device": "samsung-galaxy-s22-5g",
+                    "arch": "android-13",
+                },
+                "rows": [{"metric_1": 4.0}],
+            },
+        ]
+
+        # Test with no filters
+        empty_filters = self.module.BenchmarkFilters(
+            models=None, backends=None, devices=None
+        )
+        result = self.fetcher.filter_results(test_data, empty_filters)
+        self.assertEqual(result, test_data)
+
+        # Test with model filter
+        model_filters = self.module.BenchmarkFilters(
+            models=["llama3"], backends=None, devices=None
+        )
+        result = self.fetcher.filter_results(test_data, model_filters)
+        self.assertEqual(len(result), 2)
+        self.assertTrue(all(item["info"]["model"] == "llama3" for item in result))
+
+        # Test with backend filter
+        backend_filters = self.module.BenchmarkFilters(
+            models=None, backends=["qlora", "qnn-q8"], devices=None
+        )
+        result = self.fetcher.filter_results(test_data, backend_filters)
+        self.assertEqual(len(result), 2)
+        self.assertTrue(
+            all(item["info"]["backend"] in ["qlora", "qnn-q8"] for item in result)
+        )
+
+        # Test with device filter
+        device_filters = self.module.BenchmarkFilters(
+            models=None, backends=None, devices=["samsung-galaxy-s22-5g"]
+        )
+        result = self.fetcher.filter_results(test_data, device_filters)
+        self.assertEqual(len(result), 2)
+        self.assertTrue(
+            all("samsung-galaxy-s22-5g" in item["info"]["device"] for item in result)
+        )
+
+        # Test with combined filters (And logic fails)
+        combined_filters = self.module.BenchmarkFilters(
+            models=["llama3"], backends=["xnnpack-q8"], devices=None
+        )
+        result = self.fetcher.filter_results(test_data, combined_filters)
+        self.assertEqual(len(result), 0)
+
+        # Test with combined filters (And logic success)
+        combined_filters = self.module.BenchmarkFilters(
+            models=["llama3"], backends=None, devices=["iphone-15-pro-max"]
+        )
+        result = self.fetcher.filter_results(test_data, combined_filters)
+        self.assertEqual(len(result), 2)
+
     @patch(
         "get_benchmark_analysis_data.ExecutorchBenchmarkFetcher._fetch_execu_torch_data"
     )
@@ -442,7 +533,7 @@ def test_run_with_failure_report(self, mock_fetch):
                 "arch": "ios-17.4.3",
                 "aws_type": "private",
                 "backend": "qlora",
-                "device": "iphone-15-pro-max(private)",
+                "device": "iphone-15-pro-max",
                 "model": "llama3",
             },
             "rows": [
@@ -475,6 +566,76 @@ def test_run_no_data(self, mock_fetch):
         self.assertEqual(self.fetcher.matching_groups, {})
         mock_fetch.assert_called_once_with("2025-06-01T00:00:00", "2025-06-02T00:00:00")
 
+    @patch(
+        "get_benchmark_analysis_data.ExecutorchBenchmarkFetcher._fetch_execu_torch_data"
+    )
+    def test_run_with_filters(self, mock_fetch):
+        """Test run method with filters."""
+        # Setup mock data
+        mock_data = [
+            {
+                "groupInfo": {
+                    "model": "llama3",
+                    "backend": "qlora",
+                    "device": "Iphone 15 pro max (private)",
+                    "arch": "ios_17",
+                },
+                "rows": [{"metric_1": 1.0}],
+            },
+            {
+                "groupInfo": {
+                    "model": "mv3",
+                    "backend": "xnnpack_q8",
+                    "device": "s22_5g (private)",
+                    "arch": "android_13",
+                },
+                "rows": [{"metric_1": 2.0}],
+            },
+            {
+                "groupInfo": {
+                    "model": "mv3",
+                    "backend": "xnnpack_q8",
+                    "device": "s22_5g",
+                    "arch": "android_13",
+                },
+                "rows": [{"metric_1": 3.0}],
+            },
+        ]
+        mock_fetch.return_value = mock_data
+
+        # Create filters for llama3 model only
+        filters = self.module.BenchmarkFilters(
+            models=["llama3"], backends=None, devices=None
+        )
+        # Run the method with filters
+        self.fetcher.run("2025-06-01T00:00:00", "2025-06-02T00:00:00", filters)
+        result = self.fetcher.get_result()
+        print("result1", result)
+
+        # Verify results - should only have llama3 in private results
+        self.assertEqual(len(result["private"]), 1)
+        self.assertEqual(result["private"][0]["info"]["model"], "llama3")
+
+        # Public results should be empty since there's no matching table_name
+        self.assertEqual(result["public"], [])
+
+        # Test with backend filter
+        filters = self.module.BenchmarkFilters(
+            models=None, backends=["xnnpack-q8"], devices=None
+        )
+        self.fetcher.run("2025-06-01T00:00:00", "2025-06-02T00:00:00", filters)
+        result = self.fetcher.get_result()
+
+        print("result", result)
+
+        # Verify results - should only have xnnpack-q8 in private results
+        self.assertEqual(len(result["private"]), 1)
+        self.assertEqual(result["private"][0]["info"]["backend"], "xnnpack-q8")
+
+        # Public results should have the matching xnnpack-q8 entry
+        self.assertEqual(len(result["public"]), 1)
+        self.assertEqual(result["public"][0]["info"]["backend"], "xnnpack-q8")
+
     def test_to_dict(self):
         """Test to_dict method."""
         # Setup test data