final

yangw-dev · yangw-dev · commit 908ee267ecb9 · 2025-06-18T07:37:41.000-07:00
Signed-off-by: Yang Wang &lt;elainewy@meta.com&gt;
diff --git a/.ci/scripts/benchmark_tooling/README.md b/.ci/scripts/benchmark_tooling/README.md
@@ -19,7 +19,7 @@ python3 .ci/scripts/benchmark_tooling/get_benchmark_analysis_data.py --startTime
 ```
 
 Additional options:
-- `--silent`: Hide processing logs, show only results
+- `--not-silent`: show processing logs, otherwise only show results & minimum loggings
 - `--outputType df`: Display results in DataFrame format
 - `--outputType excel --outputDir "{YOUR_LOCAL_DIRECTORY}"`: Generate Excel file with multiple sheets (`res_private.xlsx` and `res_public.xlsx`)
 - `--outputType csv --outputDir "{YOUR_LOCAL_DIRECTORY}"`: Generate CSV files in folders (`private` and `public`)
diff --git a/.ci/scripts/benchmark_tooling/get_benchmark_analysis_data.py b/.ci/scripts/benchmark_tooling/get_benchmark_analysis_data.py
@@ -7,6 +7,7 @@
 and customizing data retrieval parameters.
 """
 
+from yaspin import yaspin
 import argparse
 import json
 import logging
@@ -211,10 +212,12 @@ def _filter_public_result(self, private_list, public_list):
             set([item["table_name"] for item in private_list])
             & set([item["table_name"] for item in public_list])
         )
-        logging.info(
-            f"Found {len(common)} table names existed in both private and public, use it to filter public tables:"
-        )
-        logging.info(json.dumps(common, indent=1))
+
+        if not self.disable_logging:
+            logging.info(
+                f"Found {len(common)} table names existed in both private and public, use it to filter public tables:"
+            )
+            logging.info(json.dumps(common, indent=1))
         filtered_public = [item for item in public_list if item["table_name"] in common]
         return filtered_public
 
@@ -450,30 +453,21 @@ def print_all_groups_info(self) -> None:
         Separates results by category and displays counts.
         This is useful for debugging and understanding what data is available.
         """
-
         if not self.data or not self.matching_groups:
             logging.info("No data found, please call get_data() first")
             return
-
-        logging.info("peeking table result:")
-        logging.info(json.dumps(self.data[0], indent=2))
-
-        for item in self.matching_groups.values():
-            logging.info(f" all {item.category} benchmark results")
-            names = []
-            for row in item.data:
-                names.append(
-                    {
-                        "table_name": row["table_name"],
-                        "info": row["info"],
-                        "counts": len(row["rows"]),
-                    }
-                )
-            logging.info(
-                f"\n============ {item.category} benchmark results({len(names)})=================\n"
+        logging.info(f" all clean benchmark table info from HUD")
+        names = []
+        for item in self.data:
+            names.append(
+                {
+                    "table_name": item.get("table_name", ""),
+                    "groupInfo": item.get("groupInfo", ""),
+                    "counts": len(item.get("rows", [])),
+                }
             )
-            for name in names:
-                logging.info(json.dumps(name, indent=2))
+        for name in names:
+            logging.info(json.dumps(name, indent=2))
 
     def _generate_table_name(self, group_info: dict, fields: list[str]) -> str:
         name = "_".join(
@@ -568,13 +562,16 @@ def _fetch_execu_torch_data(self, start_time, end_time):
             group_row_by_fields=self.query_group_row_by_fields,
         )
         params = {k: v for k, v in params_object.__dict__.items() if v is not None}
-        response = requests.get(url, params=params)
-        if response.status_code == 200:
-            return response.json()
-        else:
-            logging.info(f"Failed to fetch benchmark data ({response.status_code})")
-            logging.info(response.text)
-            return None
+        with yaspin(text="Waiting for response", color="cyan") as spinner:
+            response = requests.get(url, params=params)
+            if response.status_code == 200:
+                spinner.ok("V")
+                return response.json()
+            else:
+                logging.info(f"Failed to fetch benchmark data ({response.status_code})")
+                logging.info(response.text)
+                spinner.fail("x")
+                return None
 
     def normalize_string(self, s: str) -> str:
         s = s.lower().strip()
@@ -606,16 +603,26 @@ def argparsers():
     parser.add_argument(
         "--env", choices=["local", "prod"], default="prod", help="Environment"
     )
-    parser.add_argument("--silent", action="store_true", help="Disable logging")
 
+    parser.add_argument(
+        "--no-silent",
+        action="store_false",
+        dest="silent",
+        default=True,
+        help="Allow output (disable silent mode)",
+    )
     # Options for generate_data
     parser.add_argument(
         "--outputType",
         choices=["json", "df", "csv", "print", "excel"],
         default="print",
         help="Output format (only for generate_data)",
     )
-    parser.add_argument("--outputDir", default=".", help="Output directory")
+
+    parser.add_argument(
+        "--outputDir", default=".", help="Output directory, default is ."
+    )
+
     return parser.parse_args()
 
 
@@ -626,4 +633,6 @@ def argparsers():
         args.startTime,
         args.endTime,
     )
+    if not args.silent:
+        fetcher.print_all_groups_info()
     fetcher.output_data(args.outputType, args.outputDir)
diff --git a/.ci/scripts/benchmark_tooling/requirements.txt b/.ci/scripts/benchmark_tooling/requirements.txt
@@ -1,7 +1,7 @@
 requests>=2.32.3
 xlsxwriter>=3.2.3
 pandas>=2.3.0
-openpyxl
 tabulate
 matplotlib
 openpyxl
+yaspin