PaddlePaddle
diff --git a/‎fastdeploy/entrypoints/cli/main.py‎
Lines changed: 2 additions & 0 deletions b/‎fastdeploy/entrypoints/cli/main.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎fastdeploy/entrypoints/cli/run_batch.py‎
Lines changed: 65 additions & 0 deletions b/‎fastdeploy/entrypoints/cli/run_batch.py‎
Lines changed: 65 additions & 0 deletions
diff --git a/‎fastdeploy/entrypoints/openai/protocol.py‎
Lines changed: 68 additions & 0 deletions b/‎fastdeploy/entrypoints/openai/protocol.py‎
Lines changed: 68 additions & 0 deletions
@@ -23,10 +23,12 @@
 def main():
     import fastdeploy.entrypoints.cli.benchmark.main
     import fastdeploy.entrypoints.cli.openai
+    import fastdeploy.entrypoints.cli.run_batch
     import fastdeploy.entrypoints.cli.serve
     from fastdeploy.utils import FlexibleArgumentParser
 
     CMD_MODULES = [
+        fastdeploy.entrypoints.cli.run_batch,
         fastdeploy.entrypoints.cli.openai,
         fastdeploy.entrypoints.cli.benchmark.main,
         fastdeploy.entrypoints.cli.serve,
 
@@ -0,0 +1,65 @@
+"""
+# Copyright (c) 2025  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+
+# This file is modified from https://github.com/vllm-project/vllm/blob/main/vllm/entrypoints/cli/run_batch.py
+
+from __future__ import annotations
+
+import argparse
+import asyncio
+import importlib.metadata
+
+from fastdeploy.entrypoints.cli.types import CLISubcommand
+from fastdeploy.utils import (
+    FASTDEPLOY_SUBCMD_PARSER_EPILOG,
+    FlexibleArgumentParser,
+    show_filtered_argument_or_group_from_help,
+)
+
+
+class RunBatchSubcommand(CLISubcommand):
+    """The `run-batch` subcommand for FastDeploy CLI."""
+
+    name = "run-batch"
+
+    @staticmethod
+    def cmd(args: argparse.Namespace) -> None:
+        from fastdeploy.entrypoints.openai.run_batch import main as run_batch_main
+
+        print("FastDeploy batch processing API version", importlib.metadata.version("fastdeploy-gpu"))
+        print(args)
+        asyncio.run(run_batch_main(args))
+
+    def subparser_init(self, subparsers: argparse._SubParsersAction) -> FlexibleArgumentParser:
+        from fastdeploy.entrypoints.openai.run_batch import make_arg_parser
+
+        run_batch_parser = subparsers.add_parser(
+            "run-batch",
+            help="Run batch prompts and write results to file.",
+            description=(
+                "Run batch prompts using FastDeploy's OpenAI-compatible API.\n"
+                "Supports local or HTTP input/output files."
+            ),
+            usage="FastDeploy run-batch -i INPUT.jsonl -o OUTPUT.jsonl --model <model>",
+        )
+        run_batch_parser = make_arg_parser(run_batch_parser)
+        show_filtered_argument_or_group_from_help(run_batch_parser, ["run-batch"])
+        run_batch_parser.epilog = FASTDEPLOY_SUBCMD_PARSER_EPILOG
+        return run_batch_parser
+
+
+def cmd_init() -> list[CLISubcommand]:
+    return [RunBatchSubcommand()]
@@ -720,3 +720,71 @@ class ControlSchedulerRequest(BaseModel):
     reset: Optional[bool] = False
     load_shards_num: Optional[int] = None
     reallocate_shard: Optional[bool] = False
+
+
+from pydantic import BaseModel, Field, ValidationInfo, field_validator, model_validator
+
+BatchRequestInputBody = ChatCompletionRequest
+
+
+class BatchRequestInput(BaseModel):
+    """
+    The per-line object of the batch input file.
+
+    NOTE: Currently only the `/v1/chat/completions` endpoint is supported.
+    """
+
+    # A developer-provided per-request id that will be used to match outputs to
+    # inputs. Must be unique for each request in a batch.
+    custom_id: str
+
+    # The HTTP method to be used for the request. Currently only POST is
+    # supported.
+    method: str
+
+    # The OpenAI API relative URL to be used for the request. Currently
+    # /v1/chat/completions is supported.
+    url: str
+
+    # The parameters of the request.
+    body: BatchRequestInputBody
+
+    @field_validator("body", mode="before")
+    @classmethod
+    def check_type_for_url(cls, value: Any, info: ValidationInfo):
+        # Use url to disambiguate models
+        url: str = info.data["url"]
+        if url == "/v1/chat/completions":
+            if isinstance(value, dict):
+                return value
+            return ChatCompletionRequest.model_validate(value)
+        return value
+
+
+class BatchResponseData(BaseModel):
+    # HTTP status code of the response.
+    status_code: int = 200
+
+    # An unique identifier for the API request.
+    request_id: str
+
+    # The body of the response.
+    body: Optional[ChatCompletionResponse] = None
+
+
+class BatchRequestOutput(BaseModel):
+    """
+    The per-line object of the batch output and error files
+    """
+
+    id: str
+
+    # A developer-provided per-request id that will be used to match outputs to
+    # inputs.
+    custom_id: str
+
+    response: Optional[BatchResponseData]
+
+    # For requests that failed with a non-HTTP error, this will contain more
+    # information on the cause of the failure.
+    error: Optional[Any]