Skip to content

Commit b0be217

Browse files
committed
refactor(xtask): 整理多模型服务的方案,支持通过配置文件配置模型
Signed-off-by: YdrMaster <ydrml@hotmail.com>
1 parent a154730 commit b0be217

File tree

7 files changed

+408
-408
lines changed

7 files changed

+408
-408
lines changed

Cargo.lock

Lines changed: 41 additions & 7 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

README.md

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -89,14 +89,26 @@ cargo service --help
8989
```plaintext
9090
web service
9191
92-
Usage: xtask service [OPTIONS] --port <PORT> <MODEL>
92+
Usage: xtask service [OPTIONS] --port <PORT> <FILE>
9393
9494
Arguments:
95-
<MODEL>
95+
<FILE>
9696
9797
Options:
98+
-p, --port <PORT>
99+
--no-cuda-graph
100+
--name <NAME>
98101
--gpus <GPUS>
99102
--max-steps <MAX_STEPS>
100-
-p, --port <PORT>
103+
--think
101104
-h, --help
102105
```
106+
107+
通过 TOML 配置文件可以配置多模型服务。示例格式:
108+
109+
```toml
110+
[model-name]
111+
path = "model-path"
112+
think = true
113+
max-steps = 2048
114+
```

xtask/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ ratatui = "0.29"
1717

1818
serde.workspace = true
1919
serde_json = "1.0"
20+
toml = "0.8"
2021
tokio = { version = "1.45", features = ["rt-multi-thread", "net"] }
2122
hyper = { version = "1.6", features = ["http1", "server"] }
2223
hyper-util = { version = "0.1", features = ["http1", "tokio", "server"] }

xtask/src/main.rs

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -55,23 +55,27 @@ struct BaseArgs {
5555

5656
impl BaseArgs {
5757
fn gpus(&self) -> Box<[c_int]> {
58-
self.gpus
59-
.as_ref()
60-
.map(|devices| {
61-
static NUM_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\d+").unwrap());
62-
NUM_REGEX
63-
.find_iter(devices)
64-
.map(|c| c.as_str().parse().unwrap())
65-
.collect()
66-
})
67-
.unwrap_or_else(|| [0].into())
58+
parse_gpus(self.gpus.as_deref())
6859
}
6960

7061
fn max_steps(&self) -> usize {
7162
self.max_steps.unwrap_or(1000)
7263
}
7364
}
7465

66+
fn parse_gpus(config: Option<&str>) -> Box<[c_int]> {
67+
config
68+
.as_ref()
69+
.map(|devices| {
70+
static NUM_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\d+").unwrap());
71+
NUM_REGEX
72+
.find_iter(devices)
73+
.map(|c| c.as_str().parse().unwrap())
74+
.collect()
75+
})
76+
.unwrap_or_else(|| [0].into())
77+
}
78+
7579
mod macros {
7680
macro_rules! print_now {
7781
($($arg:tt)*) => {{

xtask/src/service/client.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
1-
use super::*;
1+
use super::V1_CHAT_COMPLETIONS;
22
use log::{info, trace, warn};
3-
use openai_struct::CreateChatCompletionStreamResponse;
3+
use openai_struct::{
4+
ChatCompletionRequestMessage, CreateChatCompletionRequest, CreateChatCompletionStreamResponse,
5+
};
46
use reqwest::header::{CONTENT_TYPE, HeaderMap, HeaderValue};
57
use std::{env::VarError, time::Instant};
68
use tokio::time::Duration;

0 commit comments

Comments
 (0)