benchmark utils added (#694)

Chi-Chu319 · web-flow · commit 902b8329cadf · 2025-01-07T15:19:00.000+02:00
added benchmark utils to make the code more readable
diff --git a/python/perf-kernels/flash-attention.py b/python/perf-kernels/flash-attention.py
@@ -28,6 +28,7 @@
 
 import triton
 import triton.language as tl
+from utils.benchmark_utils import get_available_models, get_model_configs
 
 
 class MetaData():
@@ -1870,44 +1871,18 @@ def varlen_benchmark_configs():
 
 
 def model_benchmark_configs(args):
-    import os
-    import json
-    # If user did not provide an absolute path, resolve relative path from script directory
-    if not os.path.isabs(args.model_configs):
-        config_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), args.model_configs)
-    else:
-        config_file = args.model_configs
-
-    with open(config_file, 'r') as f:
-        configs = json.load(f)
+    config_file = args.model_configs
+    configs = get_model_configs(config_path=config_file, model_families=["llama3"], model=args.model)
     fa_configs = []
+    batch_size = args.b if args.b else 1
 
-    if args.model != "all":
-        # Check if the model exists
-        model_name = args.model
-        if model_name not in configs:
-            raise ValueError(f"Model '{model_name}' not found in {config_file}")
-        # Handle a specific model
-        config = configs[model_name]
+    for model_name, config in configs.items():
         HQ = config["num_attention_heads"]
         HK = HQ if config["num_key_value_heads"] is None else config["num_key_value_heads"]
-
         max_ctx_len = config["max_ctx_len"]
         N_CTX_Q = args.sq if args.sq else max_ctx_len
         N_CTX_K = args.sk if args.sk else max_ctx_len
-        batch_size = args.b if args.b else 1
-
         fa_configs.append((model_name, batch_size, HQ, HK, N_CTX_Q, N_CTX_K))
-    else:
-        # Handle all models
-        for model_name, config in configs.items():
-            HQ = config["num_attention_heads"]
-            HK = HQ if config["num_key_value_heads"] is None else config["num_key_value_heads"]
-            max_ctx_len = config["max_ctx_len"]
-            N_CTX_Q = args.sq if args.sq else max_ctx_len
-            N_CTX_K = args.sk if args.sk else max_ctx_len
-            batch_size = args.b if args.b else 1
-            fa_configs.append((model_name, batch_size, HQ, HK, N_CTX_Q, N_CTX_K))
 
     return fa_configs
 
@@ -2038,16 +2013,7 @@ def parse_args():
     )
     parser.add_argument('-model_configs', type=str, default="model_configs.json", help="Model config json file.")
 
-    def get_available_models(config_file='model_configs.json'):
-        import os
-        import json
-        """Load model names from the configuration file."""
-        config_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), config_file)
-        with open(config_path, 'r') as f:
-            configs = json.load(f)
-        return list(configs.keys())
-
-    available_models = get_available_models()  # Dynamically load model names
+    available_models = get_available_models(model_families=["llama3"])  # Dynamically load model names
     model_help = ("Model name to benchmark. Select from: [" + ", ".join(available_models) +
                   "]. Use 'all' to benchmark all models or leave blank for the default benchmark script.")
     parser.add_argument('-model', type=str, default=None, help=model_help)
diff --git a/python/perf-kernels/gemm.py b/python/perf-kernels/gemm.py
@@ -6,7 +6,7 @@
 import pytest
 import re
 
-import os
+from utils.benchmark_utils import get_available_models, get_model_configs
 
 
 @triton.autotune(
@@ -314,15 +314,7 @@ def parse_args():
 
     parser.add_argument('-model_configs', type=str, default="model_configs.json", help="Model config json file.")
 
-    def get_available_models(config_file='model_configs.json'):
-        import json
-        """Load model names from the configuration file."""
-        config_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), config_file)
-        with open(config_path, 'r') as f:
-            configs = json.load(f)
-        return list(configs.keys())
-
-    available_models = get_available_models()  # Dynamically load model names
+    available_models = get_available_models(model_families=["llama3"])  # Dynamically load model names
     model_help = ("Model name to benchmark. Select from: [" + ", ".join(available_models) +
                   "]. Use 'all' to benchmark all models or leave blank for the default benchmark script.")
     parser.add_argument('-model', type=str, default=None, help=model_help)
@@ -350,35 +342,15 @@ def main():
     verbose = args.v
 
     if args.model:
-        batch_size = args.b if args.b else 1
-        import os
-        import json
-        # If user did not provide an absolute path, resolve relative path from script directory
-        if not os.path.isabs(args.model_configs):
-            config_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), args.model_configs)
-        else:
-            config_file = args.model_configs
-
-        with open(config_file, 'r') as f:
-            configs = json.load(f)
+        config_file = args.model_configs
+        configs = get_model_configs(config_path=config_file, model_families=["llama3"], model=args.model)
         mnk_list = []
+        batch_size = args.b if args.b else 1
 
-        if args.model != "all":
-            model_name = args.model
-            # Check if the model exists
-            if model_name not in configs:
-                raise ValueError(f"Model '{model_name}' not found in {config_file}")
-            # Handle a specific model
-            config = configs[model_name]
+        for model_name, config in configs.items():
             seq_len = args.sl if args.sl else config["max_ctx_len"]
             M, N, K = batch_size * seq_len, config["hidden_size"], config["intermediate_size"]
             mnk_list.append((model_name, M, N, K))
-        else:
-            # Handle all models
-            for model_name, config in configs.items():
-                seq_len = args.sl if args.sl else config["max_ctx_len"]
-                M, N, K = batch_size * seq_len, config["hidden_size"], config["intermediate_size"]
-                mnk_list.append((model_name, M, N, K))
 
         benchmark.benchmarks.x_names = ['model', 'M', 'N', 'K']
         benchmark.benchmarks.x_vals = mnk_list
diff --git a/python/perf-kernels/model_configs.json b/python/perf-kernels/model_configs.json
@@ -1,26 +1,28 @@
 {
-  "llama3_8B": {
-    "num_attention_heads": 32,
-    "num_key_value_heads": 8,
-    "hidden_size": 4096,
-    "max_ctx_len": 8192,
-    "intermediate_size": 14336,
-    "vocab_size": 128256
-  },
-  "llama3_70B": {
-    "num_attention_heads": 64,
-    "num_key_value_heads": 8,
-    "hidden_size": 8192,
-    "max_ctx_len": 8192,
-    "intermediate_size": 28672,
-    "vocab_size": 128256
-  },
-  "llama3_405B": {
-    "num_attention_heads": 128,
-    "num_key_value_heads": 8,
-    "hidden_size": 16384,
-    "max_ctx_len": 8192,
-    "intermediate_size": 53248,
-    "vocab_size": 128256
+  "llama3": {
+    "8B": {
+      "num_attention_heads": 32,
+      "num_key_value_heads": 8,
+      "hidden_size": 4096,
+      "max_ctx_len": 8192,
+      "intermediate_size": 14336,
+      "vocab_size": 128256
+    },
+    "70B": {
+      "num_attention_heads": 64,
+      "num_key_value_heads": 8,
+      "hidden_size": 8192,
+      "max_ctx_len": 8192,
+      "intermediate_size": 28672,
+      "vocab_size": 128256
+    },
+    "405B": {
+      "num_attention_heads": 128,
+      "num_key_value_heads": 8,
+      "hidden_size": 16384,
+      "max_ctx_len": 8192,
+      "intermediate_size": 53248,
+      "vocab_size": 128256
+    }
   }
 }
diff --git a/python/perf-kernels/rmsnorm.py b/python/perf-kernels/rmsnorm.py
@@ -6,6 +6,7 @@
 
 import triton
 import triton.language as tl
+from utils.benchmark_utils import get_available_models, get_model_configs
 
 
 def is_cuda():
@@ -171,30 +172,13 @@ def test_rmsnorm(M, N):
 
 
 def model_benchmark_configs(args):
-    import os
-    import json
-    # If user did not provide an absolute path, resolve relative path from script directory
-    if not os.path.isabs(args.model_configs):
-        config_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), args.model_configs)
-    else:
-        config_file = args.model_configs
-
-    with open(config_file, 'r') as f:
-        configs = json.load(f)
+    config_file = args.model_configs
+    configs = get_model_configs(config_path=config_file, model_families=["llama3"], model=args.model)
 
     x_vals_list = []
     batch_size = args.b if args.b else 1
 
-    if args.model == "all":
-        for model_name, config in configs.items():
-            seq_len = args.sl if args.sl else config["max_ctx_len"]
-            x_vals_list.append((model_name, batch_size * seq_len, config["hidden_size"]))
-    else:
-        if args.model not in configs:
-            raise ValueError(f"Model '{args.model}' not found in {config_file}")
-        # Handle a specific model
-        model_name = args.model
-        config = configs[model_name]
+    for model_name, config in configs.items():
         seq_len = args.sl if args.sl else config["max_ctx_len"]
         x_vals_list.append((model_name, batch_size * seq_len, config["hidden_size"]))
 
@@ -278,16 +262,7 @@ def parse_args():
     )
     parser.add_argument('-model_configs', type=str, default="model_configs.json", help="Model config json file.")
 
-    def get_available_models(config_file='model_configs.json'):
-        import os
-        import json
-        """Load model names from the configuration file."""
-        config_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), config_file)
-        with open(config_path, 'r') as f:
-            configs = json.load(f)
-        return list(configs.keys())
-
-    available_models = get_available_models()  # Dynamically load model names
+    available_models = get_available_models(model_families=["llama3"])  # Dynamically load model names
     model_help = ("Model name to benchmark. Select from: [" + ", ".join(available_models) +
                   "]. Use 'all' to benchmark all models or leave blank for the default benchmark script.")
     parser.add_argument('-model', type=str, default=None, help=model_help)
diff --git a/python/perf-kernels/softmax.py b/python/perf-kernels/softmax.py
@@ -5,6 +5,7 @@
 
 import triton
 import triton.language as tl
+from utils.benchmark_utils import get_available_models, get_model_configs
 
 
 def is_cuda():
@@ -134,30 +135,13 @@ def test_softmax(M, N):
 
 
 def model_benchmark_configs(args):
-    import os
-    import json
-    # If user did not provide an absolute path, resolve relative path from script directory
-    if not os.path.isabs(args.model_configs):
-        config_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), args.model_configs)
-    else:
-        config_file = args.model_configs
-
-    with open(config_file, 'r') as f:
-        configs = json.load(f)
+    config_file = args.model_configs
+    configs = get_model_configs(config_path=config_file, model_families=["llama3"], model=args.model)
 
     x_vals_list = []
     batch_size = args.b if args.b else 1
 
-    if args.model == "all":
-        for model_name, config in configs.items():
-            seq_len = args.sl if args.sl else config["max_ctx_len"]
-            x_vals_list.append((model_name, batch_size * seq_len, config["vocab_size"]))
-    else:
-        if args.model not in configs:
-            raise ValueError(f"Model '{args.model}' not found in {config_file}")
-        # Handle a specific model
-        model_name = args.model
-        config = configs[model_name]
+    for model_name, config in configs.items():
         seq_len = args.sl if args.sl else config["max_ctx_len"]
         x_vals_list.append((model_name, batch_size * seq_len, config["vocab_size"]))
 
@@ -232,16 +216,7 @@ def parse_args():
     )
     parser.add_argument('-model_configs', type=str, default="model_configs.json", help="Model config json file.")
 
-    def get_available_models(config_file='model_configs.json'):
-        import os
-        import json
-        """Load model names from the configuration file."""
-        config_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), config_file)
-        with open(config_path, 'r') as f:
-            configs = json.load(f)
-        return list(configs.keys())
-
-    available_models = get_available_models()  # Dynamically load model names
+    available_models = get_available_models(model_families=["llama3"])  # Dynamically load model names
     model_help = ("Model name to benchmark. Select from: [" + ", ".join(available_models) +
                   "]. Use 'all' to benchmark all models or leave blank for the default benchmark script.")
     parser.add_argument('-model', type=str, default=None, help=model_help)
diff --git a/python/perf-kernels/utils/benchmark_utils.py b/python/perf-kernels/utils/benchmark_utils.py
@@ -0,0 +1,71 @@
+import os
+import json
+
+# Base directory where configs are located
+BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "../"))
+
+
+def get_model_configs(config_path='model_configs.json', model_families=["llama3"], model="all"):
+    """
+    Load model names from the configuration file.
+
+    Args:
+        config_path (str): User-provided path to the configuration JSON file.
+        model_families (list): List of model family names to retrieve.
+
+    Returns:
+        dict: A dictionary of available models and their configurations for the specified families.
+    """
+    # Resolve config path relative to ./perf-kernels/
+    config_path = os.path.join(BASE_DIR, config_path)
+
+    with open(config_path, 'r') as f:
+        configs = json.load(f)
+
+    # Extract models and their configurations for the specified families
+    filtered_configs = {}
+
+    for family in model_families:
+        if family in configs:
+            # Check if model filtering is required
+            if model == "all":
+                # Include all models in the family
+                for model_size, model_configs in configs[family].items():
+                    filtered_configs[f"{family}-{model_size}"] = model_configs
+            else:
+                # Parse the model string (e.g., llama3_8B or llama3-8B)
+                delimiter = "_" if "_" in model else "-"
+                model_parts = model.split(delimiter)
+
+                # Check if the family and size match
+                if len(model_parts) == 2 and model_parts[0] == family:
+                    model_size = model_parts[1]
+                    if model_size in configs[family]:
+                        filtered_configs[f"{family}-{model_size}"] = configs[family][model_size]
+
+    if not filtered_configs:
+        print(f"Warning: No models selected for families: {model_families} with filter: '{model}'")
+
+    return filtered_configs
+
+
+def get_available_models(config_file='model_configs.json', model_families=["llama3"]):
+    """
+    Load model names from the configuration file.
+
+    Args:
+        config_file (str): Path to the configuration JSON file.
+        model_families (list): List of model family names to retrieve.
+
+    Returns:
+        list: A list of available models for the specified families.
+    """
+    # Resolve config path relative to ./perf-kernels/
+    config_path = os.path.join(BASE_DIR, config_file)
+
+    with open(config_path, 'r') as f:
+        configs = json.load(f)
+
+    models = [f"{family}-{model}" for family in model_families if family in configs for model in configs[family]]
+
+    return models