Skip to content

Commit 55124f8

Browse files
Add cli run batch (#4237)
* feat(log):add_request_and_response_log * [cli] add run batch cli --------- Co-authored-by: Jiang-Jia-Jun <[email protected]>
1 parent 8a96432 commit 55124f8

File tree

9 files changed

+2446
-0
lines changed

9 files changed

+2446
-0
lines changed

fastdeploy/entrypoints/cli/main.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,12 @@
2323
def main():
2424
import fastdeploy.entrypoints.cli.benchmark.main
2525
import fastdeploy.entrypoints.cli.openai
26+
import fastdeploy.entrypoints.cli.run_batch
2627
import fastdeploy.entrypoints.cli.serve
2728
from fastdeploy.utils import FlexibleArgumentParser
2829

2930
CMD_MODULES = [
31+
fastdeploy.entrypoints.cli.run_batch,
3032
fastdeploy.entrypoints.cli.openai,
3133
fastdeploy.entrypoints.cli.benchmark.main,
3234
fastdeploy.entrypoints.cli.serve,
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
"""
2+
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License"
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
"""
16+
17+
# This file is modified from https://github.com/vllm-project/vllm/blob/main/vllm/entrypoints/cli/run_batch.py
18+
19+
from __future__ import annotations
20+
21+
import argparse
22+
import asyncio
23+
import importlib.metadata
24+
25+
from fastdeploy.entrypoints.cli.types import CLISubcommand
26+
from fastdeploy.utils import (
27+
FASTDEPLOY_SUBCMD_PARSER_EPILOG,
28+
FlexibleArgumentParser,
29+
show_filtered_argument_or_group_from_help,
30+
)
31+
32+
33+
class RunBatchSubcommand(CLISubcommand):
34+
"""The `run-batch` subcommand for FastDeploy CLI."""
35+
36+
name = "run-batch"
37+
38+
@staticmethod
39+
def cmd(args: argparse.Namespace) -> None:
40+
from fastdeploy.entrypoints.openai.run_batch import main as run_batch_main
41+
42+
print("FastDeploy batch processing API version", importlib.metadata.version("fastdeploy-gpu"))
43+
print(args)
44+
asyncio.run(run_batch_main(args))
45+
46+
def subparser_init(self, subparsers: argparse._SubParsersAction) -> FlexibleArgumentParser:
47+
from fastdeploy.entrypoints.openai.run_batch import make_arg_parser
48+
49+
run_batch_parser = subparsers.add_parser(
50+
"run-batch",
51+
help="Run batch prompts and write results to file.",
52+
description=(
53+
"Run batch prompts using FastDeploy's OpenAI-compatible API.\n"
54+
"Supports local or HTTP input/output files."
55+
),
56+
usage="FastDeploy run-batch -i INPUT.jsonl -o OUTPUT.jsonl --model <model>",
57+
)
58+
run_batch_parser = make_arg_parser(run_batch_parser)
59+
show_filtered_argument_or_group_from_help(run_batch_parser, ["run-batch"])
60+
run_batch_parser.epilog = FASTDEPLOY_SUBCMD_PARSER_EPILOG
61+
return run_batch_parser
62+
63+
64+
def cmd_init() -> list[CLISubcommand]:
65+
return [RunBatchSubcommand()]

fastdeploy/entrypoints/openai/protocol.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -720,3 +720,71 @@ class ControlSchedulerRequest(BaseModel):
720720
reset: Optional[bool] = False
721721
load_shards_num: Optional[int] = None
722722
reallocate_shard: Optional[bool] = False
723+
724+
725+
from pydantic import BaseModel, Field, ValidationInfo, field_validator, model_validator
726+
727+
BatchRequestInputBody = ChatCompletionRequest
728+
729+
730+
class BatchRequestInput(BaseModel):
731+
"""
732+
The per-line object of the batch input file.
733+
734+
NOTE: Currently only the `/v1/chat/completions` endpoint is supported.
735+
"""
736+
737+
# A developer-provided per-request id that will be used to match outputs to
738+
# inputs. Must be unique for each request in a batch.
739+
custom_id: str
740+
741+
# The HTTP method to be used for the request. Currently only POST is
742+
# supported.
743+
method: str
744+
745+
# The OpenAI API relative URL to be used for the request. Currently
746+
# /v1/chat/completions is supported.
747+
url: str
748+
749+
# The parameters of the request.
750+
body: BatchRequestInputBody
751+
752+
@field_validator("body", mode="before")
753+
@classmethod
754+
def check_type_for_url(cls, value: Any, info: ValidationInfo):
755+
# Use url to disambiguate models
756+
url: str = info.data["url"]
757+
if url == "/v1/chat/completions":
758+
if isinstance(value, dict):
759+
return value
760+
return ChatCompletionRequest.model_validate(value)
761+
return value
762+
763+
764+
class BatchResponseData(BaseModel):
765+
# HTTP status code of the response.
766+
status_code: int = 200
767+
768+
# An unique identifier for the API request.
769+
request_id: str
770+
771+
# The body of the response.
772+
body: Optional[ChatCompletionResponse] = None
773+
774+
775+
class BatchRequestOutput(BaseModel):
776+
"""
777+
The per-line object of the batch output and error files
778+
"""
779+
780+
id: str
781+
782+
# A developer-provided per-request id that will be used to match outputs to
783+
# inputs.
784+
custom_id: str
785+
786+
response: Optional[BatchResponseData]
787+
788+
# For requests that failed with a non-HTTP error, this will contain more
789+
# information on the cause of the failure.
790+
error: Optional[Any]

0 commit comments

Comments
 (0)