Skip to content

Commit d370622

Browse files
tedzhouhkhhzhang16
andauthored
feat: use configmap instead of PVC for profiling results (#3981)
Signed-off-by: hongkuanz <[email protected]> Signed-off-by: Hannah Zhang <[email protected]> Co-authored-by: Hannah Zhang <[email protected]>
1 parent 51c103b commit d370622

File tree

7 files changed

+653
-156
lines changed

7 files changed

+653
-156
lines changed

benchmarks/profiler/profile_sla.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@
2222
import yaml
2323

2424
from benchmarks.profiler.utils.aiperf import benchmark_decode, benchmark_prefill
25-
from benchmarks.profiler.utils.config import generate_dgd_config_with_planner
2625
from benchmarks.profiler.utils.config_modifiers import CONFIG_MODIFIERS
26+
from benchmarks.profiler.utils.dgd_generation import generate_dgd_config_with_planner
2727
from benchmarks.profiler.utils.estimate_perf import AIConfiguratorPerfEstimator
2828
from benchmarks.profiler.utils.plot import (
2929
plot_decode_performance,
@@ -92,7 +92,6 @@ async def run_profile(args):
9292
with open(args.config, "r") as f:
9393
config = yaml.safe_load(f)
9494

95-
config = config_modifier.update_model(config, args.model)
9695
if args.dgd_image:
9796
config = config_modifier.update_image(config, args.dgd_image)
9897
logger.info(f"Using DGD image: {args.dgd_image}")
@@ -741,9 +740,12 @@ async def run_profile(args):
741740
)
742741
logger.info(f"Final DGD config with planner: {config}")
743742

744-
# save DGD config with planner
743+
# save DGD config with planner; support multi-document output when a ConfigMap is included
745744
with open(f"{args.output_dir}/config_with_planner.yaml", "w") as f:
746-
yaml.dump(config, f)
745+
if isinstance(config, list):
746+
yaml.dump_all(config, f)
747+
else:
748+
yaml.dump(config, f)
747749

748750
except Exception as e:
749751
logger.error(f"Profile job failed with error: {e}")

benchmarks/profiler/utils/config.py

Lines changed: 2 additions & 124 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,8 @@
1919
import shlex
2020
from typing import Literal, Optional, Protocol
2121

22-
import yaml
2322
from pydantic import BaseModel
2423

25-
from benchmarks.profiler.utils.planner_utils import build_planner_args_from_namespace
2624
from dynamo.common.utils.paths import get_workspace_dir
2725
from dynamo.planner.defaults import WORKER_COMPONENT_NAMES, SubComponentType
2826

@@ -104,7 +102,8 @@ class DgdPlannerServiceConfig(BaseModel):
104102
dynamoNamespace: str = "dynamo" # placeholder
105103
componentType: str = "planner"
106104
replicas: int = 1
107-
volumeMounts: list[VolumeMount] = [VolumeMount()]
105+
# Do not attach PVC; we'll mount a ConfigMap for planner data instead.
106+
volumeMounts: list[VolumeMount] = []
108107
extraPodSpec: PodSpec = PodSpec(
109108
mainContainer=Container(
110109
image="my-registry/dynamo-runtime:my-tag", # placeholder
@@ -445,124 +444,3 @@ def update_model(cls, config: dict, model_name: str) -> dict:
445444
@classmethod
446445
def update_image(cls, config: dict, image: str) -> dict:
447446
...
448-
449-
450-
def generate_dgd_config_with_planner(
451-
config_path: str,
452-
config_modifier,
453-
best_prefill_gpus: int,
454-
best_decode_gpus: int,
455-
output_dir: str,
456-
args,
457-
is_moe_model: bool = False,
458-
num_gpus_per_node: int = 8,
459-
):
460-
"""Generate DGD config with planner based on profiling results.
461-
462-
Args:
463-
config_path: Path to the YAML config file
464-
config_modifier: Config modifier instance (e.g., SGLangConfigModifier)
465-
best_prefill_gpus: Number of GPUs for prefill engine
466-
best_decode_gpus: Number of GPUs for decode engine
467-
output_dir: Output directory for profile results
468-
args: Parsed arguments namespace from profile_sla
469-
is_moe_model: Whether this is an MoE model
470-
num_gpus_per_node: Number of GPUs per node (for MoE models)
471-
472-
Returns:
473-
dict: Final DGD config with planner service configured
474-
"""
475-
476-
# Load config from file
477-
with open(config_path, "r") as f:
478-
config = yaml.safe_load(f)
479-
480-
# Update model name in config from profiling args
481-
# This ensures the final DGD uses the model specified in the DGDR, not the default in the config file
482-
config = config_modifier.update_model(config, args.model)
483-
484-
# Update container image if provided
485-
# This overrides the default image in the config file for all DGD components
486-
if args.dgd_image:
487-
config = config_modifier.update_image(config, args.dgd_image)
488-
489-
if not is_moe_model:
490-
# dense model, use TP for both prefill and decode
491-
config = config_modifier.set_config_tp_size(
492-
config, best_prefill_gpus, SubComponentType.PREFILL
493-
)
494-
config = config_modifier.set_config_tp_size(
495-
config, best_decode_gpus, SubComponentType.DECODE
496-
)
497-
else:
498-
# MoE model, use TEP for prefill and DEP for decode
499-
config = config_modifier.set_config_tep_size(
500-
config,
501-
best_prefill_gpus,
502-
num_gpus_per_node,
503-
SubComponentType.PREFILL,
504-
)
505-
config = config_modifier.set_config_dep_size(
506-
config,
507-
best_decode_gpus,
508-
num_gpus_per_node,
509-
SubComponentType.DECODE,
510-
)
511-
config = Config.model_validate(config)
512-
513-
# add PVC config if not present
514-
if not config.spec.pvcs:
515-
config.spec.pvcs = [PVCConfig()]
516-
517-
# add the planner service
518-
planner_config = DgdPlannerServiceConfig()
519-
frontend_service = config.spec.services["Frontend"]
520-
planner_config.dynamoNamespace = getattr(frontend_service, "dynamoNamespace", "dynamo") # type: ignore[attr-defined]
521-
if frontend_service.extraPodSpec and frontend_service.extraPodSpec.mainContainer:
522-
frontend_image = frontend_service.extraPodSpec.mainContainer.image
523-
if frontend_image and planner_config.extraPodSpec.mainContainer:
524-
planner_config.extraPodSpec.mainContainer.image = frontend_image
525-
526-
# Build planner args dynamically from parsed arguments
527-
# This includes shared args (ttft, itl, backend, namespace) from profile_sla
528-
# and planner-specific args (with planner_ prefix)
529-
planner_args = build_planner_args_from_namespace(args, prefix="planner_")
530-
531-
# Override profiling-specific arguments with results from profiling
532-
# Remove and re-add to ensure correct values from profiling context
533-
planner_args = [
534-
arg
535-
for arg in planner_args
536-
if not any(
537-
arg.startswith(f"--{key}=")
538-
for key in [
539-
"namespace",
540-
"prefill-engine-num-gpu",
541-
"decode-engine-num-gpu",
542-
"profile-results-dir",
543-
]
544-
)
545-
]
546-
547-
# Add arguments determined by profiling results
548-
frontend_namespace = getattr(config.spec.services["Frontend"], "dynamoNamespace", "dynamo") # type: ignore[attr-defined]
549-
planner_args.extend(
550-
[
551-
f"--namespace={frontend_namespace}",
552-
f"--prefill-engine-num-gpu={best_prefill_gpus}",
553-
f"--decode-engine-num-gpu={best_decode_gpus}",
554-
f"--profile-results-dir={output_dir}",
555-
]
556-
)
557-
558-
if (
559-
planner_config.extraPodSpec.mainContainer
560-
and planner_config.extraPodSpec.mainContainer.args is not None
561-
):
562-
planner_config.extraPodSpec.mainContainer.args.extend(planner_args)
563-
# Convert planner config to dict first, then the entire config to dict
564-
planner_dict = planner_config.model_dump(exclude_unset=False)
565-
config_dict = config.model_dump(exclude_unset=False)
566-
config_dict["spec"]["services"]["Planner"] = planner_dict
567-
568-
return config_dict
Lines changed: 220 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,220 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
import json
17+
from typing import Optional
18+
19+
import numpy as np
20+
import yaml
21+
22+
from benchmarks.profiler.utils.config import Config, DgdPlannerServiceConfig
23+
from benchmarks.profiler.utils.planner_utils import build_planner_args_from_namespace
24+
from dynamo.common.utils.paths import get_workspace_dir
25+
from dynamo.planner.defaults import SubComponentType
26+
27+
28+
def generate_dgd_config_with_planner(
29+
config_path: str,
30+
config_modifier,
31+
best_prefill_gpus: int,
32+
best_decode_gpus: int,
33+
output_dir: str,
34+
args,
35+
is_moe_model: bool = False,
36+
num_gpus_per_node: int = 8,
37+
):
38+
"""Generate DGD config with planner based on profiling results.
39+
40+
Args:
41+
config_path: Path to the YAML config file
42+
config_modifier: Config modifier instance (e.g., SGLangConfigModifier)
43+
best_prefill_gpus: Number of GPUs for prefill engine
44+
best_decode_gpus: Number of GPUs for decode engine
45+
output_dir: Output directory for profile results
46+
args: Parsed arguments namespace from profile_sla
47+
is_moe_model: Whether this is an MoE model
48+
num_gpus_per_node: Number of GPUs per node (for MoE models)
49+
50+
Returns:
51+
list[dict] | dict: If a ConfigMap is generated for planner data, returns a list
52+
of two YAML documents [ConfigMap, DGD]; otherwise returns a single DGD dict.
53+
"""
54+
55+
# Load config from file
56+
with open(config_path, "r") as f:
57+
config = yaml.safe_load(f)
58+
59+
# Update container image if provided
60+
# This overrides the default image in the config file for all DGD components
61+
if args.dgd_image:
62+
config = config_modifier.update_image(config, args.dgd_image)
63+
64+
if not is_moe_model:
65+
# dense model, use TP for both prefill and decode
66+
config = config_modifier.set_config_tp_size(
67+
config, best_prefill_gpus, SubComponentType.PREFILL
68+
)
69+
config = config_modifier.set_config_tp_size(
70+
config, best_decode_gpus, SubComponentType.DECODE
71+
)
72+
else:
73+
# MoE model, use TEP for prefill and DEP for decode
74+
config = config_modifier.set_config_tep_size(
75+
config,
76+
best_prefill_gpus,
77+
num_gpus_per_node,
78+
SubComponentType.PREFILL,
79+
)
80+
config = config_modifier.set_config_dep_size(
81+
config,
82+
best_decode_gpus,
83+
num_gpus_per_node,
84+
SubComponentType.DECODE,
85+
)
86+
config = Config.model_validate(config)
87+
88+
# add the planner service
89+
planner_config = DgdPlannerServiceConfig()
90+
frontend_service = config.spec.services["Frontend"]
91+
planner_config.dynamoNamespace = getattr(frontend_service, "dynamoNamespace", "dynamo") # type: ignore[attr-defined]
92+
if frontend_service.extraPodSpec and frontend_service.extraPodSpec.mainContainer:
93+
frontend_image = frontend_service.extraPodSpec.mainContainer.image
94+
if frontend_image and planner_config.extraPodSpec.mainContainer:
95+
planner_config.extraPodSpec.mainContainer.image = frontend_image
96+
97+
# Build planner args dynamically from parsed arguments
98+
# This includes shared args (ttft, itl, backend, namespace) from profile_sla
99+
# and planner-specific args (with planner_ prefix)
100+
planner_args = build_planner_args_from_namespace(args, prefix="planner_")
101+
102+
# Override profiling-specific arguments with results from profiling
103+
# Remove and re-add to ensure correct values from profiling context
104+
planner_args = [
105+
arg
106+
for arg in planner_args
107+
if not any(
108+
arg.startswith(f"--{key}=")
109+
for key in [
110+
"namespace",
111+
"prefill-engine-num-gpu",
112+
"decode-engine-num-gpu",
113+
"profile-results-dir",
114+
]
115+
)
116+
]
117+
118+
# Add arguments determined by profiling results
119+
frontend_namespace = getattr(config.spec.services["Frontend"], "dynamoNamespace", "dynamo") # type: ignore[attr-defined]
120+
cm_mount_path = f"{get_workspace_dir()}/profiling_results"
121+
planner_args.extend(
122+
[
123+
f"--namespace={frontend_namespace}",
124+
f"--prefill-engine-num-gpu={best_prefill_gpus}",
125+
f"--decode-engine-num-gpu={best_decode_gpus}",
126+
f"--profile-results-dir={cm_mount_path}",
127+
]
128+
)
129+
130+
if (
131+
planner_config.extraPodSpec.mainContainer
132+
and planner_config.extraPodSpec.mainContainer.args is not None
133+
):
134+
planner_config.extraPodSpec.mainContainer.args.extend(planner_args)
135+
# Convert planner config to dict first, then the entire config to dict
136+
planner_dict = planner_config.model_dump(exclude_unset=False)
137+
config_dict = config.model_dump(exclude_unset=False)
138+
139+
# Build a ConfigMap from NPZ profiling outputs and mount it into the Planner
140+
# We store data as plain JSON (lists/float/int) to avoid binary artifacts.
141+
prefill_npz = f"{output_dir}/selected_prefill_interpolation/raw_data.npz"
142+
decode_npz = f"{output_dir}/selected_decode_interpolation/raw_data.npz"
143+
144+
config_map_obj: Optional[dict] = None
145+
try:
146+
with np.load(prefill_npz) as p_raw:
147+
prefill_json = {
148+
"prefill_isl": p_raw["prefill_isl"].tolist(),
149+
"prefill_ttft": p_raw["prefill_ttft"].tolist(),
150+
"prefill_thpt_per_gpu": p_raw["prefill_thpt_per_gpu"].tolist(),
151+
}
152+
except FileNotFoundError:
153+
prefill_json = None
154+
155+
try:
156+
with np.load(decode_npz) as d_raw:
157+
# max_kv_tokens saved as array; convert to int
158+
max_kv_tokens = d_raw["max_kv_tokens"]
159+
if hasattr(max_kv_tokens, "tolist"):
160+
max_kv_tokens_val = max_kv_tokens.tolist()
161+
# Handle [value] vs value
162+
if isinstance(max_kv_tokens_val, list):
163+
max_kv_tokens_val = (
164+
int(max_kv_tokens_val[0]) if max_kv_tokens_val else 0
165+
)
166+
else:
167+
max_kv_tokens_val = int(max_kv_tokens_val)
168+
else:
169+
max_kv_tokens_val = int(max_kv_tokens)
170+
171+
decode_json = {
172+
"x_kv_usage": d_raw["x_kv_usage"].tolist(),
173+
"y_context_length": d_raw["y_context_length"].tolist(),
174+
"z_itl": d_raw["z_itl"].tolist(),
175+
"z_thpt_per_gpu": d_raw["z_thpt_per_gpu"].tolist(),
176+
"max_kv_tokens": max_kv_tokens_val,
177+
}
178+
except FileNotFoundError:
179+
decode_json = None
180+
181+
if prefill_json is not None and decode_json is not None:
182+
config_map_obj = {
183+
"apiVersion": "v1",
184+
"kind": "ConfigMap",
185+
"metadata": {"name": "planner-profile-data"},
186+
"data": {
187+
"prefill_raw_data.json": json.dumps(prefill_json),
188+
"decode_raw_data.json": json.dumps(decode_json),
189+
},
190+
}
191+
192+
# Attach the ConfigMap as a volume in the Planner service
193+
planner_volumes = planner_dict.setdefault("extraPodSpec", {}).setdefault(
194+
"volumes", []
195+
)
196+
planner_volumes.append(
197+
{
198+
"name": "planner-profile-data",
199+
"configMap": {"name": "planner-profile-data"},
200+
}
201+
)
202+
mc_dict = planner_dict.setdefault("extraPodSpec", {}).setdefault(
203+
"mainContainer", {}
204+
)
205+
mc_mounts = mc_dict.setdefault("volumeMounts", [])
206+
mc_mounts.append(
207+
{
208+
"name": "planner-profile-data",
209+
"mountPath": cm_mount_path,
210+
"readOnly": True,
211+
}
212+
)
213+
214+
# Finalize DGD services
215+
config_dict["spec"]["services"]["Planner"] = planner_dict
216+
217+
# Return multi-doc YAML (ConfigMap + DGD) when ConfigMap is created; else DGD only
218+
if config_map_obj is not None:
219+
return [config_map_obj, config_dict]
220+
return config_dict

0 commit comments

Comments
 (0)