55the ASGI server.
66"""
77
8+ from __future__ import annotations
9+
810import asyncio
911import sys
1012
1113import click
1214from loguru import logger
1315
1416from .config import MLXServerConfig
15- from .parsers import REASONING_PARSER_MAP , TOOL_PARSER_MAP , UNIFIED_PARSER_MAP
16- from .message_converters import MESSAGE_CONVERTER_MAP
1717from .main import start
18+ from .message_converters import MESSAGE_CONVERTER_MAP
19+ from .parsers import REASONING_PARSER_MAP , TOOL_PARSER_MAP , UNIFIED_PARSER_MAP
1820from .version import __version__
1921
2022
21- class UpperChoice (click .Choice ):
23+ class UpperChoice (click .Choice [ str ] ):
2224 """Case-insensitive choice type that returns uppercase values.
2325
2426 This small convenience subclass normalizes user input in a
@@ -27,7 +29,7 @@ class UpperChoice(click.Choice):
2729 where the internal representation is uppercased.
2830 """
2931
30- def normalize_choice (self , choice , ctx ):
32+ def normalize_choice (self , choice : str | None , ctx : click . Context | None ) -> str | None : # type: ignore[override]
3133 """Return the canonical uppercase choice or raise BadParameter.
3234
3335 Parameters
@@ -76,20 +78,19 @@ def normalize_choice(self, choice, ctx):
7678🚀 Version: %(version)s
7779""" ,
7880)
79- def cli ():
81+ def cli () -> None :
8082 """Top-level Click command group for the MLX server CLI.
8183
8284 Subcommands (such as ``launch``) are registered on this group and
8385 invoked by the console entry point.
8486 """
85- pass
8687
8788
88- @cli .command ()
89+ @cli .command (help = "Start the MLX OpenAI Server with the supplied flags" )
8990@click .option (
9091 "--model-path" ,
9192 required = True ,
92- help = "Path to the model (required for lm, multimodal, embeddings, image-generation, image-edit, whisper model types). With `image-generation` or `image-edit` model types, it should be the local path to the model ." ,
93+ help = "Path to the model (required for lm, multimodal, embeddings, image-generation, image-edit, whisper model types). Can be a local path or Hugging Face repository ID (e.g., 'blackforestlabs/FLUX.1-dev') ." ,
9394)
9495@click .option (
9596 "--model-type" ,
@@ -121,7 +122,18 @@ def cli():
121122@click .option (
122123 "--config-name" ,
123124 default = None ,
124- type = click .Choice (["flux-schnell" , "flux-dev" , "flux-krea-dev" , "flux-kontext-dev" , "qwen-image" , "qwen-image-edit" , "z-image-turbo" , "fibo" ]),
125+ type = click .Choice (
126+ [
127+ "flux-schnell" ,
128+ "flux-dev" ,
129+ "flux-krea-dev" ,
130+ "flux-kontext-dev" ,
131+ "qwen-image" ,
132+ "qwen-image-edit" ,
133+ "z-image-turbo" ,
134+ "fibo" ,
135+ ]
136+ ),
125137 help = "Config name of the model. Only used for image-generation and image-edit models." ,
126138)
127139@click .option (
@@ -198,37 +210,79 @@ def cli():
198210 help = "Enable debug mode for language models. Only works with language models (lm) and multimodal models." ,
199211)
200212def launch (
201- model_path ,
202- model_type ,
203- context_length ,
204- port ,
205- host ,
206- max_concurrency ,
207- queue_timeout ,
208- queue_size ,
209- quantize ,
210- config_name ,
211- lora_paths ,
212- lora_scales ,
213- disable_auto_resize ,
214- log_file ,
215- no_log_file ,
216- log_level ,
217- enable_auto_tool_choice ,
218- tool_call_parser ,
219- reasoning_parser ,
220- message_converter ,
221- trust_remote_code ,
222- chat_template_file ,
223- debug ,
213+ model_path : str ,
214+ model_type : str ,
215+ context_length : int ,
216+ port : int ,
217+ host : str ,
218+ max_concurrency : int ,
219+ queue_timeout : int ,
220+ queue_size : int ,
221+ quantize : int ,
222+ config_name : str | None ,
223+ lora_paths : str | None ,
224+ lora_scales : str | None ,
225+ disable_auto_resize : bool ,
226+ log_file : str | None ,
227+ no_log_file : bool ,
228+ log_level : str ,
229+ enable_auto_tool_choice : bool ,
230+ tool_call_parser : str | None ,
231+ reasoning_parser : str | None ,
232+ message_converter : str | None ,
233+ trust_remote_code : bool ,
234+ chat_template_file : str | None ,
235+ debug : bool ,
224236) -> None :
225237 """Start the FastAPI/Uvicorn server with the supplied flags.
226238
227239 The command builds a server configuration object using
228240 ``MLXServerConfig`` and then calls the async ``start`` routine
229241 which handles the event loop and server lifecycle.
230- """
231242
243+ Parameters
244+ ----------
245+ model_path : str
246+ Path to the model (required for lm, multimodal, embeddings, image-generation, image-edit, whisper model types).
247+ model_type : str
248+ Type of model to run (lm, multimodal, image-generation, image-edit, embeddings, whisper).
249+ context_length : int
250+ Context length for language models.
251+ port : int
252+ Port to run the server on.
253+ host : str
254+ Host to run the server on.
255+ max_concurrency : int
256+ Maximum number of concurrent requests.
257+ queue_timeout : int
258+ Request timeout in seconds.
259+ queue_size : int
260+ Maximum queue size for pending requests.
261+ quantize : int
262+ Quantization level for the model.
263+ config_name : str or None
264+ Config name of the model.
265+ lora_paths : str or None
266+ Path to the LoRA file(s).
267+ lora_scales : str or None
268+ Scale factor for the LoRA file(s).
269+ disable_auto_resize : bool
270+ Disable automatic model resizing.
271+ log_file : str or None
272+ Path to log file.
273+ no_log_file : bool
274+ Disable file logging entirely.
275+ log_level : str
276+ Set the logging level.
277+ enable_auto_tool_choice : bool
278+ Enable automatic tool choice.
279+ tool_call_parser : str or None
280+ Specify tool call parser to use.
281+ reasoning_parser : str or None
282+ Specify reasoning parser to use.
283+ trust_remote_code : bool
284+ Enable trust_remote_code when loading models.
285+ """
232286 args = MLXServerConfig (
233287 model_path = model_path ,
234288 model_type = model_type ,
0 commit comments