Skip to content

Commit e3d7bad

Browse files
authored
CI FIxes (#3415)
This commit fixes most of the CI tests.
1 parent 889227d commit e3d7bad

File tree

114 files changed

+1161
-389
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

114 files changed

+1161
-389
lines changed

3rdparty/tvm

Submodule tvm updated 458 files

python/mlc_llm/bench/api_endpoint.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -451,7 +451,11 @@ def create_api_endpoint(args: argparse.Namespace) -> APIEndPoint:
451451
return OpenAIEndPoint(args.host, args.port, args.timeout, args.include_server_metrics)
452452
if args.api_endpoint == "vllm":
453453
return OpenAIEndPoint(
454-
args.host, args.port, args.timeout, include_server_metrics=False, no_debug_config=True
454+
args.host,
455+
args.port,
456+
args.timeout,
457+
include_server_metrics=False,
458+
no_debug_config=True,
455459
)
456460
if args.api_endpoint == "openai-chat":
457461
return OpenAIChatEndPoint(args.host, args.port, args.timeout, args.include_server_metrics)

python/mlc_llm/bench/dataset.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -723,8 +723,16 @@ def __init__(self, dataset_path: str, tokenizer: AutoTokenizer) -> None:
723723
self.dataset = [
724724
(
725725
entry["TIMESTAMP"],
726-
min(entry["ContextTokens"], tokenizer.model_max_length, self.truncate_length),
727-
min(entry["GeneratedTokens"], tokenizer.model_max_length, self.truncate_length),
726+
min(
727+
entry["ContextTokens"],
728+
tokenizer.model_max_length,
729+
self.truncate_length,
730+
),
731+
min(
732+
entry["GeneratedTokens"],
733+
tokenizer.model_max_length,
734+
self.truncate_length,
735+
),
728736
)
729737
for _, entry in df.iterrows()
730738
if entry["ContextTokens"] >= 4 and entry["GeneratedTokens"] >= 4
@@ -836,7 +844,9 @@ def create_dataset( # pylint: disable=too-many-return-statements,too-many-branc
836844
args.apply_chat_template is False
837845
), "LLMPerf dataset does not support applying chat template"
838846
return LLMPerfDataset(
839-
args.dataset_path, (args.num_requests + args.num_warmup_requests) * 4, tokenizer
847+
args.dataset_path,
848+
(args.num_requests + args.num_warmup_requests) * 4,
849+
tokenizer,
840850
)
841851
if args.dataset == "json-mode-eval":
842852
assert (

python/mlc_llm/bench/request_processor.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -601,7 +601,9 @@ async def _task(request_record: RequestRecord) -> None:
601601

602602

603603
def create_pipelines( # pylint: disable=too-many-branches
604-
args: argparse.Namespace, f_create_api_endpoint: Callable[[], APIEndPoint], dataset: Dataset
604+
args: argparse.Namespace,
605+
f_create_api_endpoint: Callable[[], APIEndPoint],
606+
dataset: Dataset,
605607
) -> List[RequestProcessor]:
606608
"""Creating request processing pipelines with regard to the specified args."""
607609
cuda_profile_url = f"http://{args.host}:{args.port}" if args.cuda_profile else None

python/mlc_llm/bench/request_record.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,9 @@ def generate_metrics_summary(
113113
return report
114114

115115

116-
def _compute_metrics_statistics(metrics: List[Union[Metrics, ServerMetrics]]) -> Dict[str, Any]:
116+
def _compute_metrics_statistics(
117+
metrics: List[Union[Metrics, ServerMetrics]],
118+
) -> Dict[str, Any]:
117119
"""
118120
Compute the statistics of the metrics.
119121
@@ -133,7 +135,13 @@ def _compute_metrics_statistics(metrics: List[Union[Metrics, ServerMetrics]]) ->
133135
report: Dict = {}
134136
df = pd.DataFrame([metric.model_dump() for metric in metrics])
135137
for key, _ in metrics[0].model_fields.items():
136-
if key in ["success", "start_time", "finish_time", "server_metrics", "exec_feature"]:
138+
if key in [
139+
"success",
140+
"start_time",
141+
"finish_time",
142+
"server_metrics",
143+
"exec_feature",
144+
]:
137145
continue
138146
if key in df.columns:
139147
series = df[key].dropna()

python/mlc_llm/cli/calibrate.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,19 @@ def main(argv):
2828
help=HELP["model_lib"] + ' (default: "%(default)s")',
2929
)
3030
parser.add_argument(
31-
"--output", "-o", type=str, required=True, help=HELP["output_calibration"] + " (required)"
31+
"--output",
32+
"-o",
33+
type=str,
34+
required=True,
35+
help=HELP["output_calibration"] + " (required)",
3236
)
3337
# Download dataset from
3438
# https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json
3539
parser.add_argument(
36-
"--dataset", type=str, required=True, help=HELP["calibration_dataset"] + " (required)"
40+
"--dataset",
41+
type=str,
42+
required=True,
43+
help=HELP["calibration_dataset"] + " (required)",
3744
)
3845

3946
parser.add_argument(

python/mlc_llm/cli/compile.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,10 @@ def _check_system_lib_prefix(prefix: str) -> str:
121121
parsed.model_type = detect_model_type(parsed.model_type, parsed.model)
122122
parsed.quantization = detect_quantization(parsed.quantization, parsed.model)
123123
parsed.system_lib_prefix = detect_system_lib_prefix(
124-
parsed.device, parsed.system_lib_prefix, parsed.model_type.name, parsed.quantization.name
124+
parsed.device,
125+
parsed.system_lib_prefix,
126+
parsed.model_type.name,
127+
parsed.quantization.name,
125128
)
126129
with open(parsed.model, "r", encoding="utf-8") as config_file:
127130
config = json.load(config_file)

python/mlc_llm/cli/delivery.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,11 @@ def _run_quantization(
172172
]
173173
print(" ".join(cmd), file=log_file, flush=True)
174174
subprocess.run(
175-
cmd, check=False, stdout=log_file, stderr=subprocess.STDOUT, env=os.environ
175+
cmd,
176+
check=False,
177+
stdout=log_file,
178+
stderr=subprocess.STDOUT,
179+
env=os.environ,
176180
)
177181
logger.info("[MLC] Complete!")
178182
if not (Path(output_dir) / "tensor-cache.json").exists() and not model_info.gen_config_only:
@@ -225,7 +229,13 @@ def _generate_model_delivery_diff( # pylint: disable=too-many-locals
225229
quantization = task.quantization
226230
overrides = {**default_overrides, **task.overrides}
227231

228-
logger.info("Checking task: %s %s %s %s", model_id, conv_template, quantization, overrides)
232+
logger.info(
233+
"Checking task: %s %s %s %s",
234+
model_id,
235+
conv_template,
236+
quantization,
237+
overrides,
238+
)
229239
log_tasks = [t for t in log.tasks if t.model_id == model_id]
230240
delivered_quantizations = set()
231241
gen_config_only = set()
@@ -260,7 +270,10 @@ def _generate_model_delivery_diff( # pylint: disable=too-many-locals
260270
diff_config.default_overrides = {}
261271
diff_config.tasks = diff_tasks
262272

263-
logger.info("Model delivery diff: %s", diff_config.model_dump_json(indent=4, exclude_none=True))
273+
logger.info(
274+
"Model delivery diff: %s",
275+
diff_config.model_dump_json(indent=4, exclude_none=True),
276+
)
264277

265278
return diff_config
266279

python/mlc_llm/cli/disco_remote_socket_session.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
# specific language governing permissions and limitations
1616
# under the License.
1717
"""Internal remote disco socket session."""
18+
1819
import sys
1920

2021
from tvm import runtime as _ # pylint: disable=unused-import

python/mlc_llm/cli/serve.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,11 @@ def __repr__(self) -> str:
5353
print(f";sliding_window_size={self.sliding_window_size}", file=out, end="")
5454
print(f";attention_sink_size={self.attention_sink_size}", file=out, end="")
5555
print(f";tensor_parallel_shards={self.tensor_parallel_shards}", file=out, end="")
56-
print(f";pipeline_parallel_stages={self.pipeline_parallel_stages}", file=out, end="")
56+
print(
57+
f";pipeline_parallel_stages={self.pipeline_parallel_stages}",
58+
file=out,
59+
end="",
60+
)
5761
print(f";opt={self.opt}", file=out, end="")
5862
return out.getvalue().rstrip()
5963

0 commit comments

Comments
 (0)