mlcommons · anandhu-eng · Dec 25, 2025 · Jan 6, 2026 · Jan 9, 2026 · Jan 9, 2026
@@ -233,6 +233,11 @@ def _add_shared_benchmark_args(parser):
     parser.add_argument(
         "--report-dir", type=Path, help="Path to save detailed benchmark report"
     )
+    parser.add_argument(
+        "--ensure-submission-checker-compatibility",
+        action="store_true",
+        help="Enable loadgen compatibility mode for submission checker",
+    )
 
 
 def _add_online_specific_args(parser):

@@ -35,7 +35,11 @@
 from transformers import AutoTokenizer
 from transformers.utils import logging as transformers_logging
 
-from inference_endpoint.commands.utils import get_default_report_path
+from inference_endpoint.commands.utils import (
+    generate_mlperf_log_details_submission_checker,
+    generate_user_conf_submission_checker,
+    get_default_report_path,
+)
 from inference_endpoint.config.runtime_settings import RuntimeSettings
 from inference_endpoint.config.schema import (
     APIType,
@@ -291,6 +295,9 @@ def _build_config_from_cli(
     timeout = getattr(args, "timeout", None)
     verbose_level = getattr(args, "verbose", 0)
     api_type = APIType(getattr(args, "api_type", "openai"))
+    ensure_submission_checker_compatibility = getattr(
+        args, "ensure_submission_checker_compatibility", False
+    )
     # Build BenchmarkConfig from CLI params
     return BenchmarkConfig(
         name=f"cli_{benchmark_mode}",
@@ -349,6 +356,7 @@ def _build_config_from_cli(
         report_dir=report_dir,
         timeout=timeout,
         verbose=verbose_level > 0,
+        ensure_submission_checker_compatibility=ensure_submission_checker_compatibility,
     )
 
 
@@ -712,6 +720,24 @@ def signal_handler(signum, frame):
         except Exception as e:
             logger.error(f"Save failed: {e}")
 
+        if config.ensure_submission_checker_compatibility:
+            try:
+                # convert the runtime_settings.json to user.conf format and
+                generate_user_conf_submission_checker(report_dir)
+            except Exception as e:
+                logger.error(
+                    f"Failed to generate user conf for submission checker: {e}"
+                )
+                raise
+            try:
+                # generate mlperf_log_details.txt from summary.json
+                generate_mlperf_log_details_submission_checker(report_dir, strict=True)
+            except Exception as e:
+                logger.error(
+                    f"Failed to generate mlperf_log_details.txt for submission checker: {e}"
+                )
+                raise
+
     except KeyboardInterrupt:
         logger.warning("Benchmark interrupted by user")
         raise

@@ -16,6 +16,7 @@
 """Utility commands: info, validate, init."""
 
 import argparse
+import json
 import logging
 import os
 import platform
@@ -31,6 +32,7 @@
 from pydantic import ValidationError as PydanticValidationError
 
 from .. import __version__
+from ..config.constants import ENDPOINTS_TO_LOADGEN_KEY_MAPPING
 from ..config.schema import TEMPLATE_TYPE_MAP, BenchmarkConfig
 from ..config.yaml_loader import ConfigError, ConfigLoader
 from ..exceptions import InputValidationError, SetupError
@@ -313,3 +315,95 @@ def get_default_report_path() -> Path:
     return Path(
         f"{tempfile.gettempdir()}/reports_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
     )
+
+
+def generate_user_conf_submission_checker(report_dir: Path) -> None:
+    """Generate user.conf file for submission checker from runtime_settings.json.
+
+    Converts endpoints runtime_settings keys to loadgen keys using the mapping
+    defined in config.constants.ENDPOINTS_TO_LOADGEN_KEY_MAPPING.
+
+    Args:
+        report_dir: Path to the report directory containing runtime_settings.json.
+
+    Raises:
+        FileNotFoundError: If runtime_settings.json does not exist in report_dir.
+    """
+
+    runtime_settings_path = report_dir / "runtime_settings.json"
+    user_conf_path = report_dir / "user.conf"
+
+    if not runtime_settings_path.exists():
+        logger.error(f"runtime_settings.json not found in {report_dir}")
+        raise FileNotFoundError(f"runtime_settings.json not found in {report_dir}")
+    try:
+        with open(runtime_settings_path) as f:
+            runtime_settings = json.load(f)
+
+        with open(user_conf_path, "w") as f:
+            for key, value in runtime_settings.items():
+                # Map endpoints key to loadgen key if mapping exists, otherwise use same key
+                loadgen_key = ENDPOINTS_TO_LOADGEN_KEY_MAPPING.get(key, key)
+                f.write(f"*.*.{loadgen_key}={value}\n")
+
+        logger.info(f"Generated user.conf at {user_conf_path}")
+
+    except Exception as e:
+        logger.error(f"Failed to generate user.conf: {e}")
+        raise
+
+
+def generate_mlperf_log_details_submission_checker(
+    report_dir: Path, strict: bool
+) -> None:
+    """Generate mlperf_log_details.txt file for submission checker from summary.json.
+
+    Converts endpoints summary keys to loadgen keys using the mapping
+    defined in config.constants.ENDPOINTS_TO_LOADGEN_KEY_MAPPING.
+
+    Args:
+        report_dir: Path to the report directory containing summary.json.
+
+    Raises:
+        FileNotFoundError: If runtime_settings.json does not exist in report_dir.
+    """
+
+    summary_path = report_dir / "summary.json"
+    log_details_path = report_dir / "mlperf_log_details.txt"
+    marker = ":::ENDPTS"
+
+    if not summary_path.exists():
+        logger.error(f"summary.json not found in {report_dir}")
+        raise FileNotFoundError(f"summary.json not found in {report_dir}")
+    try:
+        with (
+            open(summary_path) as summary_file,
+            open(log_details_path, "w") as output_file,
+        ):
+            for line in summary_file:
+                line = line.strip()
+                if line.find(marker) == 0:
+                    try:
+                        record = json.loads(line[len(marker) :])
+                    except json.JSONDecodeError as e:
+                        if strict:
+                            logger.error(f"Encountered invalid line: {line} Error: {e}")
+                            raise
+                        else:
+                            logger.warning(f"Skipping invalid line: {line}")
+                            continue
+                    # map keys
+                    original_key = record.get("key")
+                    if original_key in ENDPOINTS_TO_LOADGEN_KEY_MAPPING:
+                        record["key"] = ENDPOINTS_TO_LOADGEN_KEY_MAPPING[original_key]
+                    output_file.write(
+                        f"{marker} {json.dumps(record, separators=(',', ':'))}\n"
+                    )
+                else:
+                    logger.warning(f"Found invalid line {line}, skipping.")
+
+        logger.info(f"Generated mlperf_log_details.txt at {log_details_path}")
+
+    except Exception as e:
+        logger.error(f"Failed to generate mlperf_log_details.txt: {e}")
+        raise
@@ -0,0 +1,86 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Global constants and mappings for the inference endpoint package."""
+
+# Mapping from endpoints results keys to MLPerf loadgen and submission checker supported keys
+# This ensures compatibility when generating user.conf and mlperf_log_details.txt for submission checker
+# Format: {"endpoints_key": "loadgen_key"}
+ENDPOINTS_TO_LOADGEN_KEY_MAPPING = {
+    "endpoints_version": "loadgen_version",
+    "endpoints_git_commit_date": "loadgen_git_commit_date",
+    "endpoints_git_commit_hash": "loadgen_git_commit_hash",
+    "test_datetime": "test_datetime",
+    "n_samples_issued": "qsl_reported_total_count",
+    "n_samples_from_dataset": "qsl_reported_performance_count",
+    "effective_scenario": "effective_scenario",
+    "mode": "effective_test_mode",
+    "streaming": "streaming",
+    "output_sequence_lengths.min": "min_output_tokens",
+    "output_sequence_lengths.max": "max_output_tokens",
+    "load_pattern": "load_pattern",
+    "min_duration_ms": "effective_min_duration_ms",
+    "max_duration_ms": "effective_max_duration_ms",
+    "effective_target_duration_ms": "effective_target_duration_ms",
+    "min_sample_count": "effective_min_query_count",
+    "effective_sample_index_rng_seed": "effective_sample_index_rng_seed",
+    "effective_schedule_rng_seed": "effective_schedule_rng_seed",
+    "effective_sample_concatenate_permutation": "effective_sample_concatenate_permutation",
+    "effective_samples_per_query": "effective_samples_per_query",
+    "generated_query_count": "generated_query_count",
+    "generated_query_duration": "generated_query_duration",
+    "target_qps": "effective_target_qps",  # (results_summary.json)
+    "result_scheduled_samples_per_sec": "result_scheduled_samples_per_sec",
+    "qps": "result_completed_samples_per_sec",
+    "results_sample_per_second": "results_sample_per_second",
+    "effective_max_concurrency": "effective_max_async_queries",
+    "effective_target_latency_ns": "effective_target_latency_ns",
+    "effective_target_latency_percentile": "effective_target_latency_percentile",
+    "latency.min": "result_min_latency_ns",
+    "latency.max": "result_max_latency_ns",
+    "latency.avg": "result_mean_latency_ns",
+    "latency.percentiles.50": "result_50.00_percentile_latency_ns",
+    "latency.percentiles.90": "result_90.00_percentile_latency_ns",
+    "latency.percentiles.95": "result_95.00_percentile_latency_ns",
+    "latency.percentiles.99": "result_99.00_percentile_latency_ns",
+    "latency.percentiles.99.9": "result_99.90_percentile_latency_ns",
+    "ttft.min": "result_first_token_min_latency_ns",
+    "ttft.max": "result_first_token_max_latency_ns",
+    "ttft.avg": "result_first_token_mean_latency_ns",
+    "ttft.percentiles.50": "result_first_token_50.00_percentile_latency_ns",
+    "ttft.percentiles.90": "result_first_token_90.00_percentile_latency_ns",
+    "ttft.percentiles.95": "result_first_token_95.00_percentile_latency_ns",
+    "ttft.percentiles.99": "result_first_token_99.00_percentile_latency_ns",
+    "ttft.percentiles.99.9": "result_first_token_99.90_percentile_latency_ns",
+    "tpot.percentiles.50": "result_time_per_output_token_50.00_percentile_ns",
+    "tpot.percentiles.90": "result_time_per_output_token_90.00_percentile_ns",
+    "tpot.percentiles.95": "result_time_per_output_token_95.00_percentile_ns",
+    "tpot.percentiles.99": "result_time_per_output_token_99.00_percentile_ns",
+    "tpot.percentiles.99.9": "result_time_per_output_token_99.90_percentile_ns",
+    "tpot.min": "result_time_to_output_token_min",
+    "tpot.max": "result_time_to_output_token_max",
+    "tpot.avg": "result_time_to_output_token_mean",
+    "tps": "result_completed_tokens_per_second",
+    "result_validity": "result_validity",
+    "result_perf_constraints_met": "result_perf_constraints_met",
+    "result_min_duration_met": "result_min_duration_met",
+    "result_min_queries_met": "result_min_queries_met",
+    "early_stopping_met": "early_stopping_met",
+    "early_stopping_ttft_result": "early_stopping_ttft_result",
+    "early_stopping_tpot_result": "early_stopping_tpot_result",
+    "result.total": "result_query_count",
+    "result_overlatency_query_count": "result_overlatency_query_count",
+    "result.failed": "num_errors",
+}
@@ -387,6 +387,7 @@ class BenchmarkConfig(BaseModel):
     report_dir: Path | None = None
     timeout: float | None = None
     verbose: bool = False
+    ensure_submission_checker_compatibility: bool = False
     # CPU affinity for loadgen and worker processes:
     #   - True = auto (compute optimal NUMA-aware plan)
     #   - False = disabled (no CPU pinning)