Skip to content
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions src/inference_endpoint/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,11 @@ def _add_shared_benchmark_args(parser):
parser.add_argument(
"--report-dir", type=Path, help="Path to save detailed benchmark report"
)
parser.add_argument(
"--ensure-submission-checker-compatibility",
action="store_true",
help="Enable loadgen compatibility mode for submission checker",
)


def _add_online_specific_args(parser):
Expand Down
10 changes: 9 additions & 1 deletion src/inference_endpoint/commands/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,10 @@
from transformers import AutoTokenizer
from transformers.utils import logging as transformers_logging

from inference_endpoint.commands.utils import get_default_report_path
from inference_endpoint.commands.utils import (
generate_user_conf_submission_checker,
get_default_report_path,
)
from inference_endpoint.config.runtime_settings import RuntimeSettings
from inference_endpoint.config.schema import (
BenchmarkConfig,
Expand Down Expand Up @@ -688,6 +691,11 @@ def signal_handler(signum, frame):
except Exception as e:
logger.error(f"Save failed: {e}")

if config.ensure_submission_checker_compatibility:
# convert the runtime_settings.json to user.conf format and
# result_summary.json to mlperf_log_details.txt format(TODO)
generate_user_conf_submission_checker(report_dir)

except KeyboardInterrupt:
logger.warning("Benchmark interrupted by user")
# Will be re-raised by CLI main() for proper exit
Expand Down
36 changes: 36 additions & 0 deletions src/inference_endpoint/commands/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
from pydantic import ValidationError as PydanticValidationError

from .. import __version__
from ..config.constants import ENDPOINTS_TO_LOADGEN_KEY_MAPPING
from ..config.schema import TEMPLATE_TYPE_MAP, BenchmarkConfig
from ..config.yaml_loader import ConfigError, ConfigLoader
from ..exceptions import InputValidationError, SetupError
Expand Down Expand Up @@ -314,3 +315,38 @@ def get_default_report_path() -> Path:
return Path(
f"{tempfile.gettempdir()}/reports_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}"
)


def generate_user_conf_submission_checker(report_dir: Path) -> None:
"""Generate user.conf file for submission checker from runtime_settings.json.

Converts endpoints runtime_settings keys to loadgen keys using the mapping
defined in config.constants.ENDPOINTS_TO_LOADGEN_KEY_MAPPING.

Args:
report_dir: Path to the report directory containing runtime_settings.json.

Raises:
FileNotFoundError: If runtime_settings.json does not exist in report_dir.
"""

runtime_settings_path = report_dir / "runtime_settings.json"
user_conf_path = report_dir / "user.conf"

if not runtime_settings_path.exists():
logger.error(f"runtime_settings.json not found in {report_dir}")
raise FileNotFoundError(f"runtime_settings.json not found in {report_dir}")
try:
with open(runtime_settings_path) as f:
runtime_settings = yaml.safe_load(f)

with open(user_conf_path, "w") as f:
for key, value in runtime_settings.items():
# Map endpoints key to loadgen key if mapping exists, otherwise use same key
loadgen_key = ENDPOINTS_TO_LOADGEN_KEY_MAPPING.get(key, key)
f.write(f"*.*.{loadgen_key}={value}\n")

logger.info(f"Generated user.conf at {user_conf_path}")

except Exception as e:
logger.error(f"Failed to generate user.conf: {e}")
28 changes: 28 additions & 0 deletions src/inference_endpoint/config/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Global constants and mappings for the inference endpoint package."""

# Mapping from endpoints results keys to MLPerf loadgen and submission checker supported keys
# This ensures compatibility when generating user.conf and mlperf_log_details.txt for submission checker
# Format: {"endpoints_key": "loadgen_key"}
ENDPOINTS_TO_LOADGEN_KEY_MAPPING = {
"n_samples_from_dataset": "qsl_reported_performance_count",
# "n_samples_to_issue": "",
# "total_samples_to_issue": "",
"max_duration_ms": "effective_max_duration_ms",
"min_duration_ms": "effective_min_duration_ms",
"min_sample_count": "effective_min_query_count",
}
1 change: 1 addition & 0 deletions src/inference_endpoint/config/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,7 @@ class BenchmarkConfig(BaseModel):
report_dir: Path | None = None
timeout: int | None = None
verbose: bool = False
ensure_submission_checker_compatibility: bool = True

@classmethod
def from_yaml_file(cls, path: Path) -> BenchmarkConfig:
Expand Down
160 changes: 160 additions & 0 deletions tests/unit/commands/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,14 @@
up and validating their benchmark configurations.
"""

import json
from pathlib import Path
from unittest.mock import MagicMock

import pytest
from inference_endpoint import __version__
from inference_endpoint.commands.utils import (
generate_user_conf_submission_checker,
run_info_command,
run_init_command,
run_validate_command,
Expand Down Expand Up @@ -195,3 +197,161 @@ async def test_init_all_templates(self, tmp_path):

assert output_file.exists()
assert output_file.stat().st_size > 0


class TestGenerateUserConfSubmissionChecker:
"""Test user.conf generation for submission checker.

Validates that the user.conf file is generated correctly with proper
key mapping from endpoints runtime settings to MLPerf loadgen format.
This is critical for submission checker compatibility.
"""

@pytest.fixture
def sample_runtime_settings(self):
"""Sample runtime settings data for testing."""
return {
"n_samples_from_dataset": 1000,
"n_samples_to_issue": 500,
"total_samples_to_issue": 500,
"max_duration_ms": 60000,
"min_duration_ms": 30000,
"min_sample_count": 100,
"scheduler_random_seed": 42,
"dataloader_random_seed": 123,
}

@pytest.fixture
def report_dir_with_settings(self, tmp_path, sample_runtime_settings):
"""Create a report directory with runtime_settings.json."""
report_dir = tmp_path / "test_report"
report_dir.mkdir()

runtime_settings_file = report_dir / "runtime_settings.json"
with open(runtime_settings_file, "w") as f:
json.dump(sample_runtime_settings, f)

return report_dir

def test_generate_user_conf_success(self, report_dir_with_settings):
"""Test successful user.conf generation."""
# Generate user.conf
generate_user_conf_submission_checker(report_dir_with_settings)

# Check if user.conf exists
user_conf_path = report_dir_with_settings / "user.conf"
assert user_conf_path.exists(), "user.conf file should be created"

# Read and verify contents
content = user_conf_path.read_text()
lines = content.strip().split("\n")

# Verify file is not empty
assert (
len(lines) > 0
), "user.conf should not be empty when runtime_settings exists with data"

# Verify format: each line should be in format "<text>.<text>.<text>=<value>"
for line in lines:
assert "=" in line, f"Line should contain '=' but got: {line}"
key_part, value_part = line.split("=")
# Should have at least 3 parts separated by dots
parts = key_part.split(".")
assert (
len(parts) == 3
), f"Key should have format '<text>.<text>.<text>' but got: {key_part}"
# Each part should be non-empty
for part in parts:
assert (
len(part) > 0
), f"Each part in key should be non-empty but got: {key_part}"
# Value should not be empty
assert len(value_part) > 0, f"Value should not be empty: {line}"

def test_missing_runtime_settings_file(self, tmp_path):
"""Test error handling when runtime_settings.json is missing."""
report_dir = tmp_path / "empty_report"
report_dir.mkdir()

# Should raise FileNotFoundError
with pytest.raises(
FileNotFoundError, match=f"runtime_settings.json not found in {report_dir}"
):
generate_user_conf_submission_checker(report_dir)

# user.conf should not be created
user_conf_path = report_dir / "user.conf"
assert (
not user_conf_path.exists()
), "user.conf should not be created when runtime_settings.json is missing"

def test_empty_runtime_settings(self, tmp_path):
"""Test handling of empty runtime settings."""
report_dir = tmp_path / "empty_settings_report"
report_dir.mkdir()

# Create empty runtime_settings.json
runtime_settings_file = report_dir / "runtime_settings.json"
with open(runtime_settings_file, "w") as f:
json.dump({}, f)

# Should succeed but create empty user.conf
generate_user_conf_submission_checker(report_dir)

user_conf_path = report_dir / "user.conf"
assert (
user_conf_path.exists()
), "user.conf should be created even with empty settings"

content = user_conf_path.read_text()
assert (
content.strip() == ""
), "user.conf should be empty when runtime_settings is empty"

def test_user_conf_with_unmapped_keys(self, tmp_path):
"""Test that unmapped keys are included with their original names."""
report_dir = tmp_path / "unmapped_report"
report_dir.mkdir()

# Create runtime_settings with both mapped and unmapped keys
runtime_settings = {
"n_samples_from_dataset": 1000, # This will be mapped to qsl_reported_performance_count
"custom_key": "custom_value", # This should remain as-is
"another_setting": 42, # This should remain as-is
}

runtime_settings_file = report_dir / "runtime_settings.json"
with open(runtime_settings_file, "w") as f:
json.dump(runtime_settings, f)

generate_user_conf_submission_checker(report_dir)

user_conf_path = report_dir / "user.conf"
content = user_conf_path.read_text()

# Check mapped key
assert "*.*.qsl_reported_performance_count=1000" in content

# Check unmapped keys (should use original names)
assert "*.*.custom_key=custom_value" in content
assert "*.*.another_setting=42" in content

def test_user_conf_overwrites_existing(self, report_dir_with_settings):
"""Test that generating user.conf overwrites existing file."""
user_conf_path = report_dir_with_settings / "user.conf"

# Create existing user.conf with different content
user_conf_path.write_text("*.*.old_key=old_value\n")

# Generate new user.conf
generate_user_conf_submission_checker(report_dir_with_settings)

# Read new content
content = user_conf_path.read_text()

# Should not contain old content
assert "old_key" not in content
assert "old_value" not in content

# Should contain new content
assert "*.*.qsl_reported_performance_count=1000" in content