Skip to content

Commit 92c8386

Browse files
authored
Adding option to disable DCGM when in remote mode (#952)
* Adding option to disable DCGM when in remote mode. * Updating documentation
1 parent a66700f commit 92c8386

File tree

9 files changed

+128
-12
lines changed

9 files changed

+128
-12
lines changed

docs/config.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,9 @@ cpu_only_composing_models: <comma-delimited-string-list>
188188
# List of GPU UUIDs to be used for the profiling. Use 'all' to profile all the GPUs visible by CUDA
189189
[ gpus: <string|comma-delimited-list-string> | default: 'all' ]
190190
191+
# Disables DCGM (used to verify info about GPUs)
192+
[ dcgm_disable: <bool> | default: false ]
193+
191194
# Search mode. Options are "brute", "quick", and "optuna"
192195
[ run_config_search_mode: <string> | default: brute]
193196

model_analyzer/analyzer.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,11 @@ def _create_model_manager(self, client, gpus):
214214
def _get_server_only_metrics(self, client, gpus):
215215
if self._config.triton_launch_mode != "c_api":
216216
if not self._state_manager._starting_fresh_run:
217-
if self._do_checkpoint_gpus_match(gpus):
217+
if self._config.dcgm_disable:
218+
logger.info(
219+
"DCGM is disabled - cannot verify that GPU devices match checkpoint"
220+
)
221+
elif self._do_checkpoint_gpus_match(gpus):
218222
logger.info(
219223
"GPU devices match checkpoint - skipping server metric acquisition"
220224
)

model_analyzer/config/input/config_command.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@ def _check_for_illegal_config_settings(
129129
self._check_for_bls_incompatibility(args, yaml_config)
130130
self._check_for_concurrency_rate_request_conflicts(args, yaml_config)
131131
self._check_for_config_search_rate_request_conflicts(args, yaml_config)
132+
self._check_for_dcgm_disable_launch_mode_conflict(args, yaml_config)
132133

133134
def _set_field_values(
134135
self, args: Namespace, yaml_config: Optional[Dict[str, List]]
@@ -398,6 +399,19 @@ def _check_for_config_search_rate_request_conflicts(
398399
f"\nCannot have both `run-config-search-max-request-rate` and `run-config-search-min/max-concurrency` specified in the config/CLI."
399400
)
400401

402+
def _check_for_dcgm_disable_launch_mode_conflict(
403+
self, args: Namespace, yaml_config: Optional[Dict[str, List]]
404+
) -> None:
405+
if self._get_config_value("dcgm_disable", args, yaml_config):
406+
launch_mode = self._get_config_value(
407+
"triton_launch_mode", args, yaml_config
408+
)
409+
410+
if launch_mode != "remote":
411+
raise TritonModelAnalyzerException(
412+
f"\nIf `dcgm-disable` then `triton-launch-mode` must be set to remote"
413+
)
414+
401415
def _preprocess_and_verify_arguments(self):
402416
"""
403417
Enforces some rules on the config.

model_analyzer/config/input/config_command_profile.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
DEFAULT_CLIENT_PROTOCOL,
4646
DEFAULT_COLLECT_CPU_METRICS,
4747
DEFAULT_CONCURRENCY_SWEEP_DISABLE,
48+
DEFAULT_DCGM_DISABLE,
4849
DEFAULT_DURATION_SECONDS,
4950
DEFAULT_EXPORT_PATH,
5051
DEFAULT_FILENAME_MODEL_GPU,
@@ -288,6 +289,16 @@ def _fill_config(self):
288289
description="Report GPU metrics, even when the model is `cpu_only`.",
289290
)
290291
)
292+
self._add_config(
293+
ConfigField(
294+
"dcgm_disable",
295+
field_type=ConfigPrimitive(bool),
296+
flags=["--dcgm-disable"],
297+
parser_args={"action": "store_true"},
298+
default_value=DEFAULT_DCGM_DISABLE,
299+
description="Disables DCGM, which prevents obtaining information about GPUs",
300+
)
301+
)
291302
self._add_config(
292303
ConfigField(
293304
"skip_summary_reports",

model_analyzer/config/input/config_defaults.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@
6262
DEFAULT_USE_CONCURRENCY_FORMULA = False
6363
DEFAULT_REQUEST_RATE_SEARCH_ENABLE = False
6464
DEFAULT_CONCURRENCY_SWEEP_DISABLE = False
65+
DEFAULT_DCGM_DISABLE = False
6566
DEFAULT_TRITON_LAUNCH_MODE = "local"
6667
DEFAULT_TRITON_DOCKER_IMAGE = "nvcr.io/nvidia/tritonserver:24.11-py3"
6768
DEFAULT_TRITON_HTTP_ENDPOINT = "localhost:8000"

model_analyzer/entrypoint.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,10 @@ def main():
260260
)
261261

262262
# Set up devices
263-
gpus = GPUDeviceFactory().verify_requested_gpus(config.gpus)
263+
if config.dcgm_disable:
264+
gpus = []
265+
else:
266+
gpus = GPUDeviceFactory().verify_requested_gpus(config.gpus)
264267

265268
# Check/create output model repository
266269
create_output_model_repository(config)

model_analyzer/triton/server/server_factory.py

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,9 @@ def get_server_handle(config, gpus, use_model_repository=False):
126126
"""
127127

128128
if config.triton_launch_mode == "remote":
129-
server = TritonServerFactory._get_remote_server_handle(config)
129+
server = TritonServerFactory._get_remote_server_handle(
130+
config, print_warning_message=use_model_repository
131+
)
130132
elif config.triton_launch_mode == "local":
131133
server = TritonServerFactory._get_local_server_handle(
132134
config, gpus, use_model_repository=True
@@ -147,22 +149,23 @@ def get_server_handle(config, gpus, use_model_repository=False):
147149
return server
148150

149151
@staticmethod
150-
def _get_remote_server_handle(config):
152+
def _get_remote_server_handle(config, print_warning_message=True):
151153
triton_config = TritonServerConfig()
152154
triton_config.update_config(config.triton_server_flags)
153155
triton_config["model-repository"] = "remote-model-repository"
154156
logger.info("Using remote Triton Server")
155157
server = TritonServerFactory.create_server_local(
156158
path=None, config=triton_config, gpus=[], log_path=""
157159
)
158-
logger.warning(
159-
"GPU memory metrics reported in the remote mode are not"
160-
" accurate. Model Analyzer uses Triton explicit model control to"
161-
" load/unload models. Some frameworks do not release the GPU"
162-
" memory even when the memory is not being used. Consider"
163-
' using the "local" or "docker" mode if you want to accurately'
164-
" monitor the GPU memory usage for different models."
165-
)
160+
if print_warning_message:
161+
logger.warning(
162+
"GPU memory metrics reported in the remote mode are not"
163+
" accurate. Model Analyzer uses Triton explicit model control to"
164+
" load/unload models. Some frameworks do not release the GPU"
165+
" memory even when the memory is not being used. Consider"
166+
' using the "local" or "docker" mode if you want to accurately'
167+
" monitor the GPU memory usage for different models."
168+
)
166169

167170
return server
168171

tests/test_cli.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ def get_test_options():
6767
OptionStruct("bool", "profile","--always-report-gpu-metrics"),
6868
OptionStruct("bool", "profile","--use-concurrency-formula"),
6969
OptionStruct("bool", "profile","--concurrency-sweep-disable"),
70+
OptionStruct("bool", "profile","--dcgm-disable"),
7071

7172

7273
#Int/Float options

tests/test_config.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2388,6 +2388,82 @@ def test_model_type_llm(self):
23882388
config.inference_output_fields, DEFAULT_LLM_INFERENCE_OUTPUT_FIELDS
23892389
)
23902390

2391+
def test_dcgm_disable_and_launch_mode(self):
2392+
"""
2393+
Test that launch mode is set to remote when dcgm is disabled
2394+
"""
2395+
2396+
# Should raise an exception for docker, local, and c_api launch modes
2397+
args = [
2398+
"model-analyzer",
2399+
"profile",
2400+
"--profile-models",
2401+
"modelA",
2402+
"--model-repository",
2403+
"cli-repository",
2404+
"-f",
2405+
"path-to-config-file",
2406+
"--dcgm-disable",
2407+
"--triton-launch-mode",
2408+
"docker",
2409+
]
2410+
2411+
yaml_content = ""
2412+
2413+
with self.assertRaises(TritonModelAnalyzerException):
2414+
self._evaluate_config(args, yaml_content, subcommand="profile")
2415+
2416+
args = [
2417+
"model-analyzer",
2418+
"profile",
2419+
"--profile-models",
2420+
"modelA",
2421+
"--model-repository",
2422+
"cli-repository",
2423+
"-f",
2424+
"path-to-config-file",
2425+
"--dcgm-disable",
2426+
"--triton-launch-mode",
2427+
"local",
2428+
]
2429+
2430+
with self.assertRaises(TritonModelAnalyzerException):
2431+
self._evaluate_config(args, yaml_content, subcommand="profile")
2432+
2433+
args = [
2434+
"model-analyzer",
2435+
"profile",
2436+
"--profile-models",
2437+
"modelA",
2438+
"--model-repository",
2439+
"cli-repository",
2440+
"-f",
2441+
"path-to-config-file",
2442+
"--dcgm-disable",
2443+
"--triton-launch-mode",
2444+
"c_api",
2445+
]
2446+
2447+
with self.assertRaises(TritonModelAnalyzerException):
2448+
self._evaluate_config(args, yaml_content, subcommand="profile")
2449+
2450+
# Should not raise an exception for remote mode
2451+
args = [
2452+
"model-analyzer",
2453+
"profile",
2454+
"--profile-models",
2455+
"modelA",
2456+
"--model-repository",
2457+
"cli-repository",
2458+
"-f",
2459+
"path-to-config-file",
2460+
"--dcgm-disable",
2461+
"--triton-launch-mode",
2462+
"remote",
2463+
]
2464+
2465+
_ = self._evaluate_config(args, yaml_content, subcommand="profile")
2466+
23912467
def _test_request_rate_config_conflicts(
23922468
self, base_args: List[Any], yaml_content: str
23932469
) -> None:

0 commit comments

Comments
 (0)