[NVIDIA#8391][fix] check perf by device subtype (NVIDIA#8428)

MrGeva · dominicshanshan · commit 80bf70a4a5e2 · 2025-10-31T22:59:11.000-07:00
Signed-off-by: Eran Geva &lt;19514940+MrGeva@users.noreply.github.com&gt;
diff --git a/tests/integration/defs/perf/base_perf_pytorch.csv b/tests/integration/defs/perf/base_perf_pytorch.csv
@@ -1,8 +1,17 @@
-network_name,perf_case_name,test_name,threshold,absolute_threshold,metric_type,perf_metric
-"llama_v3.1_8b_instruct-bench-pytorch-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192","H100_PCIe-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_inference_time[llama_v3.1_8b_instruct-bench-pytorch-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]","test_perf_metric_inference_time[llama_v3.1_8b_instruct-bench-pytorch-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]",0.1,50,INFERENCE_TIME,99133.65406
-"llama_v3.1_8b_instruct-bench-pytorch-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192","H100_PCIe-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_seq_throughput[llama_v3.1_8b_instruct-bench-pytorch-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]","test_perf_metric_seq_throughput[llama_v3.1_8b_instruct-bench-pytorch-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]",-0.1,10,SEQ_THROUGHPUT,82.63618
-"llama_v3.1_8b_instruct-bench-pytorch-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192","H100_PCIe-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_token_throughput[llama_v3.1_8b_instruct-bench-pytorch-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]","test_perf_metric_token_throughput[llama_v3.1_8b_instruct-bench-pytorch-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]",-0.1,10,TOKEN_THROUGHPUT,10577.431520000002
-"llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192","H100_PCIe-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_inference_time[llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]","test_perf_metric_inference_time[llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]",0.3,50,INFERENCE_TIME,214410.6447
-"llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192","H100_PCIe-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_kv_cache_size[llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]","test_perf_metric_kv_cache_size[llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]",0.3,50,KV_CACHE_SIZE,68.84
-"llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192","H100_PCIe-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_seq_throughput[llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]","test_perf_metric_seq_throughput[llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]",-0.3,10,SEQ_THROUGHPUT,38.2071
-"llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192","H100_PCIe-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_token_throughput[llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]","test_perf_metric_token_throughput[llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]",-0.3,10,TOKEN_THROUGHPUT,4890.5035
+network_name,perf_case_name,test_name,threshold,absolute_threshold,metric_type,perf_metric,device_subtype
+"llama_v3.1_8b_instruct-bench-pytorch-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192","H100_PCIe-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_inference_time[llama_v3.1_8b_instruct-bench-pytorch-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]","test_perf_metric_inference_time[llama_v3.1_8b_instruct-bench-pytorch-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]",0.20,5000,INFERENCE_TIME,109007.96,
+"llama_v3.1_8b_instruct-bench-pytorch-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192","H100_PCIe-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_seq_throughput[llama_v3.1_8b_instruct-bench-pytorch-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]","test_perf_metric_seq_throughput[llama_v3.1_8b_instruct-bench-pytorch-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]",-0.20,5,SEQ_THROUGHPUT,76.45,
+"llama_v3.1_8b_instruct-bench-pytorch-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192","H100_PCIe-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_token_throughput[llama_v3.1_8b_instruct-bench-pytorch-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]","test_perf_metric_token_throughput[llama_v3.1_8b_instruct-bench-pytorch-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]",-0.20,500,TOKEN_THROUGHPUT,9785.75,
+"llama_v3.1_8b_instruct-bench-pytorch-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192","H100_PCIe-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_kv_cache_size[llama_v3.1_8b_instruct-bench-pytorch-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]","test_perf_metric_kv_cache_size[llama_v3.1_8b_instruct-bench-pytorch-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]",0.20,2,KV_CACHE_SIZE,55.64,
+"llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192","H100_PCIe-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_inference_time[llama_v3.1_8b_instruct-subtype:H100_PCIe-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]","test_perf_metric_inference_time[llama_v3.1_8b_instruct-subtype:H100_PCIe-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]",0.20,5000,INFERENCE_TIME,171845.02,H100_PCIe
+"llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192","H100_PCIe-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_kv_cache_size[llama_v3.1_8b_instruct-subtype:H100_PCIe-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]","test_perf_metric_kv_cache_size[llama_v3.1_8b_instruct-subtype:H100_PCIe-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]",0.20,2,KV_CACHE_SIZE,57.17,H100_PCIe
+"llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192","H100_PCIe-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_seq_throughput[llama_v3.1_8b_instruct-subtype:H100_PCIe-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]","test_perf_metric_seq_throughput[llama_v3.1_8b_instruct-subtype:H100_PCIe-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]",-0.20,5,SEQ_THROUGHPUT,48.09,H100_PCIe
+"llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192","H100_PCIe-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_token_throughput[llama_v3.1_8b_instruct-subtype:H100_PCIe-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]","test_perf_metric_token_throughput[llama_v3.1_8b_instruct-subtype:H100_PCIe-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]",-0.20,500,TOKEN_THROUGHPUT,6155.59,H100_PCIe
+"llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192","H100_NVL-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_inference_time[llama_v3.1_8b_instruct-subtype:H100_NVL-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]","test_perf_metric_inference_time[llama_v3.1_8b_instruct-subtype:H100_NVL-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]",0.20,5000,INFERENCE_TIME,139897.82,H100_NVL
+"llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192","H100_NVL-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_kv_cache_size[llama_v3.1_8b_instruct-subtype:H100_NVL-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]","test_perf_metric_kv_cache_size[llama_v3.1_8b_instruct-subtype:H100_NVL-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]",0.20,2,KV_CACHE_SIZE,69.59,H100_NVL
+"llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192","H100_NVL-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_seq_throughput[llama_v3.1_8b_instruct-subtype:H100_NVL-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]","test_perf_metric_seq_throughput[llama_v3.1_8b_instruct-subtype:H100_NVL-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]",-0.20,5,SEQ_THROUGHPUT,58.63,H100_NVL
+"llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192","H100_NVL-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_token_throughput[llama_v3.1_8b_instruct-subtype:H100_NVL-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]","test_perf_metric_token_throughput[llama_v3.1_8b_instruct-subtype:H100_NVL-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]",-0.20,500,TOKEN_THROUGHPUT,7504.07,H100_NVL
+"llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192","H100-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_inference_time[llama_v3.1_8b_instruct-subtype:H100-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]","test_perf_metric_inference_time[llama_v3.1_8b_instruct-subtype:H100-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]",0.20,5000,INFERENCE_TIME,125068.76,H100
+"llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192","H100-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_kv_cache_size[llama_v3.1_8b_instruct-subtype:H100-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]","test_perf_metric_kv_cache_size[llama_v3.1_8b_instruct-subtype:H100-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]",0.20,2,KV_CACHE_SIZE,57.09,H100
+"llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192","H100-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_seq_throughput[llama_v3.1_8b_instruct-subtype:H100-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]","test_perf_metric_seq_throughput[llama_v3.1_8b_instruct-subtype:H100-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]",-0.20,5,SEQ_THROUGHPUT,65.50,H100
+"llama_v3.1_8b_instruct-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192","H100-PyTorch-Perf-1/perf/test_perf.py::test_perf_metric_token_throughput[llama_v3.1_8b_instruct-subtype:H100-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]","test_perf_metric_token_throughput[llama_v3.1_8b_instruct-subtype:H100-bench-_autodeploy-float16-maxbs:512-maxnt:2048-input_output_len:128,128-reqs:8192]",-0.20,500,TOKEN_THROUGHPUT,8384.00,H100
diff --git a/tests/integration/defs/perf/data_export.py b/tests/integration/defs/perf/data_export.py
@@ -70,7 +70,7 @@
 # Uses tuple to do (src, dest) key names
 TEST_DESCRIPTION_KEYS = [
     "perf_case_name", "network_name", "framework", "sm_clk", "mem_clk",
-    "gpu_idx", "network_hash", "flags"
+    "gpu_idx", "network_hash", "flags", "device_subtype"
 ]
 
 # Tuples are used if the internal dictionary keys are different from output dictionary keys.
diff --git a/tests/integration/defs/perf/gpu_clock_lock.py b/tests/integration/defs/perf/gpu_clock_lock.py
@@ -29,9 +29,9 @@
 import psutil  # type: ignore
 # Nvidia
 import pynvml  # type: ignore
-from defs.trt_test_alternative import print_info, print_warning
+from defs.trt_test_alternative import print_error, print_info, print_warning
 
-from .misc import clean_device_product_name
+from .misc import clean_device_product_name, get_device_subtype
 
 
 class InvalidGPUMonitoringResultError(RuntimeError):
@@ -124,6 +124,12 @@ def get_cpu_properties(self):
     def get_gpu_properties(self):
         return self._gpu_properties
 
+    def get_device_subtype(self):
+        """Get the device subtype for the primary GPU."""
+        if self._gpu_properties and "device_subtype" in self._gpu_properties:
+            return self._gpu_properties["device_subtype"]
+        return None
+
     def get_gpu_id(self):
         return self._gpu_id
 
@@ -500,6 +506,10 @@ def _setup_properties(self):
                 "device_product_name"] = clean_device_product_name(
                     self._gpu_properties["device_product_name"])
 
+            # Add device subtype based on cleaned product name
+            self._gpu_properties["device_subtype"] = get_device_subtype(
+                self._gpu_properties["device_product_name"])
+
             if "jetson" in self._gpu_properties[
                     "device_product_name"] or "p3710" in self._gpu_properties[
                         "device_product_name"]:
diff --git a/tests/integration/defs/perf/misc.py b/tests/integration/defs/perf/misc.py
@@ -21,6 +21,23 @@
 _GPU_DEVICE_PRODUCT_NAME_MAPPING = {"A100-PCIE-80GB": "A100 80GB PCIe"}
 
 
+def get_device_subtype(device_product_name: str) -> str:
+    """
+    Get device subtype based on device product name.
+
+    Simply converts the cleaned device product name to a consistent format
+    by replacing spaces and hyphens with underscores.
+
+    Args:
+        device_product_name: Cleaned device product name from NVML
+
+    Returns:
+        Device subtype string with consistent formatting
+    """
+    # Convert device name to consistent subtype format (replace spaces and hyphens with underscores)
+    return device_product_name.replace(" ", "_").replace("-", "_")
+
+
 def clean_device_product_name(device_product_name):
     cleaned_name = device_product_name
     cleaned_name = cleaned_name.replace("NVIDIA", "").strip()
diff --git a/tests/integration/defs/perf/sanity_perf_check.py b/tests/integration/defs/perf/sanity_perf_check.py
@@ -139,6 +139,72 @@ def _is_performance_regression(self, base_value: float, target_value: float,
             # Negative threshold: higher is better - regression if target < base
             return target_value < base_value
 
+    def _filter_by_device_subtype(
+            self, base_perf: pd.DataFrame,
+            current_perf: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame]:
+        """
+        Filter performance data to match device subtypes for autodeploy tests.
+
+        For autodeploy tests, only compare against baselines with the same device subtype.
+        For non-autodeploy tests, use the original behavior.
+
+        Args:
+            base_perf: Baseline performance DataFrame
+            current_perf: Current performance DataFrame
+
+        Returns:
+            Tuple of (filtered_base_perf, filtered_current_perf)
+        """
+        # If current performance data doesn't have device_subtype column, return as-is
+        if 'device_subtype' not in current_perf.columns:
+            return base_perf, current_perf
+
+        # Get the current device subtype from current performance data
+        current_device_subtypes = current_perf['device_subtype'].dropna(
+        ).unique()
+
+        if len(current_device_subtypes) == 0:
+            # No device subtype info in current data, return as-is
+            return base_perf, current_perf
+
+        current_device_subtype = current_device_subtypes[
+            0]  # Assume single device type per run
+        print(
+            f"Filtering performance data for device subtype: {current_device_subtype}"
+        )
+
+        # Filter base performance data to only include entries with matching device subtype
+        # or entries without device subtype info (for backward compatibility)
+        if 'device_subtype' in base_perf.columns:
+            # Filter base data: keep entries with matching subtype or null subtype
+            base_filtered = base_perf[
+                (base_perf['device_subtype'] == current_device_subtype) |
+                (base_perf['device_subtype'].isna())].copy()
+        else:
+            # Base data doesn't have device subtype column, keep all entries
+            base_filtered = base_perf.copy()
+
+        # For autodeploy tests, only keep current entries with device subtype
+        autodeploy_mask = current_perf['network_name'].str.contains(
+            '_autodeploy', na=False)
+        current_filtered = current_perf.copy()
+
+        # For autodeploy tests, ensure device subtype is present
+        if autodeploy_mask.any():
+            autodeploy_entries = current_perf[autodeploy_mask]
+            non_autodeploy_entries = current_perf[~autodeploy_mask]
+
+            # Keep only autodeploy entries that have device subtype
+            autodeploy_with_subtype = autodeploy_entries[
+                autodeploy_entries['device_subtype'].notna()]
+
+            # Combine filtered autodeploy entries with non-autodeploy entries
+            current_filtered = pd.concat(
+                [autodeploy_with_subtype, non_autodeploy_entries],
+                ignore_index=True)
+
+        return base_filtered, current_filtered
+
     def __call__(self, *args, **kwargs):
         # Check if the base_perf_csv file exists
         if not self.base_perf_csv.exists():
@@ -150,6 +216,10 @@ def __call__(self, *args, **kwargs):
         base_perf = load_file(self.base_perf_csv.as_posix())
         current_perf = load_file(self.target_perf_csv.as_posix())
 
+        # Filter performance data by device subtype for autodeploy tests
+        base_perf, current_perf = self._filter_by_device_subtype(
+            base_perf, current_perf)
+
         full_diff, new_base = get_diff(base_perf, current_perf)
         if not full_diff.empty:
             self.report_diff(full_diff)
diff --git a/tests/integration/defs/perf/test_perf.py b/tests/integration/defs/perf/test_perf.py
diff --git a/tests/integration/defs/perf/utils.py b/tests/integration/defs/perf/utils.py

Original file line number	Diff line number	Diff line change
`@@ -70,7 +70,7 @@`
`70`	`70`	`# Uses tuple to do (src, dest) key names`
`71`	`71`	`TEST_DESCRIPTION_KEYS = [`
`72`	`72`	`"perf_case_name", "network_name", "framework", "sm_clk", "mem_clk",`
`73`		`- "gpu_idx", "network_hash", "flags"`
	`73`	`+ "gpu_idx", "network_hash", "flags", "device_subtype"`
`74`	`74`	`]`
`75`	`75`
`76`	`76`	`# Tuples are used if the internal dictionary keys are different from output dictionary keys.`