Skip to content

Commit 9cc499b

Browse files
XuZhang99yq33victor
authored andcommitted
feat: choose model backend automatically.
1 parent 47bcce5 commit 9cc499b

File tree

11 files changed

+57
-38
lines changed

11 files changed

+57
-38
lines changed

README.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,6 @@ Run the following command to start xLLM engine:
167167
```bash
168168
./build/xllm/core/server/xllm \ # launch xllm server
169169
--model=/path/to/your/llm \ # model path(to replace with your own path)
170-
--backend=llm \ # indicate the LLM backend
171170
--port=9977 \ # set service port to 9977
172171
--max_memory_utilization 0.90 # set the maximal utilization of device memory
173172
```

README_zh.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,6 @@ python setup.py bdist_wheel
168168
```bash
169169
./build/xllm/core/server/xllm \ # 启动 xllm 服务器程序
170170
--model=/path/to/your/llm \ # 指定模型路径(需替换为实际路径)
171-
--backend=llm \ # 指定后端类型为 LLM
172171
--port=9977 \ # 设置服务端口为 9977
173172
--max_memory_utilization 0.90 # 设置最大内存利用率为 90
174173
```

docs/en/getting_started/single_node.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ Start the single-node `xllm` service directly:
44
```bash linenums="1"
55
./build/xllm/core/server/xllm \
66
--model=/path/to/your/qwen2-7b \
7-
--backend=llm \
87
--port=9977 \
98
--max_memory_utilization 0.90
109
```

docs/en/getting_started/start_vlm_service.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,11 @@ This document describes how to start a VLM model service based on the xLLM infer
55
## Single Device
66
Start the service by executing the following command in the main directory of the `xllm` project:
77
```bash
8-
ASCEND_RT_VISIBLE_DEVICES=0 ./build/xllm/core/server/xllm --model=/path/to/Qwen2.5-VL-7B-Instruct --port=12345 --max_memory_utilization 0.90 --backend vlm
8+
ASCEND_RT_VISIBLE_DEVICES=0 ./build/xllm/core/server/xllm --model=/path/to/Qwen2.5-VL-7B-Instruct --port=12345 --max_memory_utilization 0.90
99
```
1010

1111
## Multiple Devices
1212
Start the service by executing the following command in the main directory of the `xllm` project:
1313
```bash
14-
ASCEND_RT_VISIBLE_DEVICES=0,1 ./build/xllm/core/server/xllm --model=/path/to/Qwen2.5-VL-7B-Instruct --port=12345 --max_memory_utilization 0.90 --backend vlm
14+
ASCEND_RT_VISIBLE_DEVICES=0,1 ./build/xllm/core/server/xllm --model=/path/to/Qwen2.5-VL-7B-Instruct --port=12345 --max_memory_utilization 0.90
1515
```

docs/zh/cli_reference.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ xLLM使用gflags来管理服务启动参数,具体的参数含义如下:
2626
| `enable_prefix_cache` | bool | true | false | 是否开启prefix cache(DeepSeek暂不支持) | |
2727
| `communication_backend` | string | "hccl" | "lccl" | 通信操作采用的后端 | |
2828
| `block_size` | int32 | 128 | | KV Cache存储的block size大小 | |
29-
| `backend` | string | "llm" | "vlm" | 模型类型 | |
3029
| `task` | string | "generate" | "embed" | 服务类型,生成式或embedding | |
3130
| `max_cache_size` | int64 | 0 | | 可使用的KV Cache大小,单位byte | |
3231

docs/zh/getting_started/single_node.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
```bash linenums="1"
44
./build/xllm/core/server/xllm \
55
--model=/path/to/your/qwen2-7b \
6-
--backend=llm \
76
--port=9977 \
87
--max_memory_utilization 0.90
98
```

docs/zh/getting_started/start_vlm_service.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,11 @@
44
## 单卡
55
启动服务,在`xllm`工程主目录中执行下面命令:
66
```bash
7-
ASCEND_RT_VISIBLE_DEVICES=0 ./build/xllm/core/server/xllm --model=/path/to/Qwen2.5-VL-7B-Instruct --port=12345 --max_memory_utilization 0.90 --backend vlm --devices auto
7+
ASCEND_RT_VISIBLE_DEVICES=0 ./build/xllm/core/server/xllm --model=/path/to/Qwen2.5-VL-7B-Instruct --port=12345 --max_memory_utilization 0.90 --devices auto
88
```
99

1010
## 多卡
1111
启动服务,在`xllm`工程主目录中执行下面命令:
1212
```bash
13-
ASCEND_RT_VISIBLE_DEVICES=0,1 ./build/xllm/core/server/xllm --model=/path/to/Qwen2.5-VL-7B-Instruct --port=12345 --max_memory_utilization 0.90 --backend vlm --devices auto
13+
ASCEND_RT_VISIBLE_DEVICES=0,1 ./build/xllm/core/server/xllm --model=/path/to/Qwen2.5-VL-7B-Instruct --port=12345 --max_memory_utilization 0.90 --devices auto
1414
```

xllm/core/common/global_flags.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -56,10 +56,11 @@ DEFINE_string(model_id, "", "hf model name.");
5656

5757
DEFINE_string(model, "", "Name or path of the huggingface model to use.");
5858

59-
DEFINE_string(backend,
60-
"llm",
61-
"Choose the backend model type. 'llm' for text-only, "
62-
"'vlm' for multimodal (text and images).");
59+
DEFINE_string(
60+
backend,
61+
"",
62+
"Choose the backend model type. 'llm' for text-only, "
63+
"'vlm' for multimodal (text and images), 'dit' for diffusion models.");
6364

6465
DEFINE_string(task,
6566
"generate",

xllm/models/model_registry.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ void ModelRegistry::register_causallm_factory(const std::string& name,
7171
<< " already registered.");
7272
} else {
7373
instance->model_registry_[name].causal_lm_factory = factory;
74+
instance->model_backend_[name] = "llm";
7475
}
7576
}
7677

@@ -83,6 +84,7 @@ void ModelRegistry::register_causalvlm_factory(const std::string& name,
8384
<< " already registered.");
8485
} else {
8586
instance->model_registry_[name].causal_vlm_factory = factory;
87+
instance->model_backend_[name] = "vlm";
8688
}
8789
}
8890

@@ -95,6 +97,7 @@ void ModelRegistry::register_embeddinglm_factory(const std::string& name,
9597
<< " already registered.");
9698
} else {
9799
instance->model_registry_[name].embedding_lm_factory = factory;
100+
instance->model_backend_[name] = "llm";
98101
}
99102
}
100103

@@ -107,6 +110,7 @@ void ModelRegistry::register_dit_model_factory(const std::string& name,
107110
<< " already registered.");
108111
} else {
109112
instance->model_registry_[name].dit_model_factory = factory;
113+
instance->model_backend_[name] = "dit";
110114
}
111115
}
112116

@@ -229,6 +233,11 @@ TokenizerArgsLoader ModelRegistry::get_tokenizer_args_loader(
229233
return instance->model_registry_[name].tokenizer_args_loader;
230234
}
231235

236+
std::string ModelRegistry::get_model_backend(const std::string& name) {
237+
ModelRegistry* instance = get_instance();
238+
return instance->model_backend_[name];
239+
}
240+
232241
std::unique_ptr<CausalLM> create_llm_model(const ModelContext& context) {
233242
// get the factory function for the model type from model registry
234243
auto factory = ModelRegistry::get_causallm_factory(

xllm/models/model_registry.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,8 +126,11 @@ class ModelRegistry {
126126
static ImageProcessorFactory get_image_processor_factory(
127127
const std::string& name);
128128

129+
static std::string get_model_backend(const std::string& name);
130+
129131
private:
130132
std::unordered_map<std::string, ModelMeta> model_registry_;
133+
std::unordered_map<std::string, std::string> model_backend_;
131134
};
132135

133136
std::unique_ptr<CausalLM> create_llm_model(const ModelContext& context);

0 commit comments

Comments
 (0)