Skip to content

Commit 05ed741

Browse files
authored
feat: update the base operator package to RC2. (jd-opensource#103)
1 parent 905963f commit 05ed741

File tree

18 files changed

+154
-74
lines changed

18 files changed

+154
-74
lines changed

CMakeLists.txt

Lines changed: 47 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -21,29 +21,56 @@ if(USE_NPU)
2121
set(USE_A2 ON)
2222
message(STATUS "Building for device: A2 (macro USE_A2 defined)")
2323
endif()
24-
if(USE_A2)
25-
execute_process(
26-
COMMAND git -C "${CMAKE_SOURCE_DIR}/third_party/xllm_ops" rev-parse HEAD
27-
OUTPUT_VARIABLE XLLM_OPS_GIT_HEAD
28-
OUTPUT_STRIP_TRAILING_WHITESPACE
29-
ERROR_QUIET
30-
)
3124

32-
if(NOT DEFINED XLLM_OPS_GIT_HEAD_CACHED OR NOT XLLM_OPS_GIT_HEAD STREQUAL XLLM_OPS_GIT_HEAD_CACHED)
33-
message(STATUS "xllm_ops git HEAD changed; running precompile via execute_process")
34-
execute_process(
35-
COMMAND bash ${CMAKE_SOURCE_DIR}/third_party/xllm_ops/build.sh
36-
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/third_party/xllm_ops
37-
RESULT_VARIABLE XLLM_OPS_RESULT
38-
)
39-
if(NOT XLLM_OPS_RESULT EQUAL 0)
40-
message(FATAL_ERROR "Failed to precompile xllm ops, error code: ${XLLM_OPS_RESULT}")
25+
option(INSTALL_XLLM_KERNELS "Install xllm_kernels RPM" ON)
26+
message(STATUS "INSTALL_XLLM_KERNELS enabled: ${INSTALL_XLLM_KERNELS}")
27+
if(INSTALL_XLLM_KERNELS)
28+
if(DEVICE_TYPE STREQUAL "USE_A3")
29+
message("downloading a3 arm xllm kernels")
30+
file(DOWNLOAD
31+
"https://9n-das-tools.s3.cn-north-1.jdcloud-oss.com/xllm-ai/xllm_kernels/0.6.0/xllm_kernels-1.0.0-Linux.a3.arm.rpm"
32+
"${CMAKE_BINARY_DIR}/xllm_kernels.rpm"
33+
)
34+
else()
35+
message("downloading a2 arm xllm_kernels")
36+
if(DEVICE_ARCH STREQUAL "ARM")
37+
file(DOWNLOAD
38+
"https://9n-das-tools.s3.cn-north-1.jdcloud-oss.com/xllm-ai/xllm_kernels/0.6.0/xllm_kernels-1.2.0-Linux.a2.arm.rpm"
39+
"${CMAKE_BINARY_DIR}/xllm_kernels.rpm"
40+
)
41+
else()
42+
message("downloading a3 x86 xllm_kernels")
43+
file(DOWNLOAD
44+
"https://9n-das-tools.s3.cn-north-1.jdcloud-oss.com/xllm-ai/xllm_kernels/0.6.0/xllm_kernels-1.2.0-Linux.a2.x86.rpm"
45+
"${CMAKE_BINARY_DIR}/xllm_kernels.rpm"
46+
)
4147
endif()
42-
set(XLLM_OPS_GIT_HEAD_CACHED "${XLLM_OPS_GIT_HEAD}" CACHE INTERNAL "" FORCE)
43-
message(STATUS "xllm ops precompiled and HEAD cache updated")
44-
else()
45-
message(STATUS "xllm_ops git HEAD unchanged; skipping precompile")
4648
endif()
49+
execute_process(COMMAND rpm -ivh "${CMAKE_BINARY_DIR}/xllm_kernels.rpm")
50+
file(WRITE "${CMAKE_BINARY_DIR}/.xllm_installed" "")
51+
endif()
52+
53+
execute_process(
54+
COMMAND git -C "${CMAKE_SOURCE_DIR}/third_party/xllm_ops" rev-parse HEAD
55+
OUTPUT_VARIABLE XLLM_OPS_GIT_HEAD
56+
OUTPUT_STRIP_TRAILING_WHITESPACE
57+
ERROR_QUIET
58+
)
59+
60+
if(NOT DEFINED XLLM_OPS_GIT_HEAD_CACHED OR NOT XLLM_OPS_GIT_HEAD STREQUAL XLLM_OPS_GIT_HEAD_CACHED)
61+
message(STATUS "xllm_ops git HEAD changed; running precompile via execute_process")
62+
execute_process(
63+
COMMAND bash ${CMAKE_SOURCE_DIR}/third_party/xllm_ops/build.sh
64+
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/third_party/xllm_ops
65+
RESULT_VARIABLE XLLM_OPS_RESULT
66+
)
67+
if(NOT XLLM_OPS_RESULT EQUAL 0)
68+
message(FATAL_ERROR "Failed to precompile xllm ops, error code: ${XLLM_OPS_RESULT}")
69+
endif()
70+
set(XLLM_OPS_GIT_HEAD_CACHED "${XLLM_OPS_GIT_HEAD}" CACHE INTERNAL "" FORCE)
71+
message(STATUS "xllm ops precompiled and HEAD cache updated")
72+
else()
73+
message(STATUS "xllm_ops git HEAD unchanged; skipping precompile")
4774
endif()
4875
endif()
4976

docs/en/getting_started/compile.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ pip install --upgrade setuptools wheel
4141
```
4242

4343
## Compilation
44-
Execute the compilation to generate the executable file `build/xllm/core/server/xllm` under `build/`:
44+
Execute the compilation to generate the executable file `build/xllm/core/server/xllm` under `build/`,The default architecture is x86 (A2). For ARM, add `--arch arm`, and for A3, add `--device a3`.:
4545
```bash
4646
python setup.py build
4747
```

docs/zh/getting_started/compile.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ pip install -r cibuild/requirements-dev.txt -i https://mirrors.tuna.tsinghua.edu
4141
pip install --upgrade setuptools wheel
4242
```
4343
## 编译
44-
执行编译,在`build/`下生成可执行文件`build/xllm/core/server/xllm`
44+
执行编译,在`build/`下生成可执行文件`build/xllm/core/server/xllm`,默认为x86架构A2,arm请加 `--arch arm`, A3请加 `--device a3`
4545
```bash
4646
python setup.py build
4747
```

setup.py

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ def set_npu_envs():
109109
os.environ["PYTHON_INCLUDE_PATH"] = get_python_include_path()
110110
os.environ["PYTHON_LIB_PATH"] = get_torch_root_path()
111111
os.environ["LIBTORCH_ROOT"] = get_torch_root_path()
112-
112+
os.environ["INSTALL_XLLM_KERNELS"] = "ON" if install_kernels else "OFF"
113113
NPU_TOOLKIT_HOME = os.getenv("NPU_TOOLKIT_HOME")
114114
if not NPU_TOOLKIT_HOME:
115115
os.environ["NPU_TOOLKIT_HOME"] = "/usr/local/Ascend/ascend-toolkit/latest"
@@ -204,13 +204,15 @@ class ExtBuild(build_ext):
204204
("base-dir=", None, "base directory of xLLM project"),
205205
("device=", None, "target device type (a3 or a2 or mlu)"),
206206
("arch=", None, "target arch type (x86 or arm)"),
207+
("install-xllm-kernels=", None, "install xllm_kernels RPM package (true/false)"),
207208
]
208209

209210
def initialize_options(self):
210211
build_ext.initialize_options(self)
211212
self.base_dir = get_base_dir()
212213
self.device = "a2"
213214
self.arch = "x86"
215+
self.install_xllm_kernels = "true"
214216

215217
def finalize_options(self):
216218
build_ext.finalize_options(self)
@@ -278,6 +280,7 @@ def build_extension(self, ext: CMakeExtension):
278280
f"-DBUILD_SHARED_LIBS=OFF",
279281
f"-DDEVICE_TYPE=USE_{self.device.upper()}",
280282
f"-DDEVICE_ARCH={self.arch.upper()}",
283+
f"-DINSTALL_XLLM_KERNELS={'ON' if self.install_xllm_kernels else 'OFF'}",
281284
]
282285

283286
if self.device == "a2" or self.device == "a3":
@@ -346,6 +349,7 @@ def initialize_options(self):
346349
super().initialize_options()
347350
self.device = None
348351
self.arch = None
352+
self.install_xllm_kernels = "true"
349353

350354
def finalize_options(self):
351355
super().finalize_options()
@@ -512,6 +516,7 @@ def apply_patch():
512516
if __name__ == "__main__":
513517
device = 'a2' # default
514518
arch = "x86" # default
519+
install_kernels = True
515520
if '--device' in sys.argv:
516521
idx = sys.argv.index('--device')
517522
if idx + 1 < len(sys.argv):
@@ -536,7 +541,19 @@ def apply_patch():
536541
apply_patch()
537542
else:
538543
sys.argv.remove("--dry_run")
539-
544+
if '--install-xllm-kernels' in sys.argv:
545+
idx = sys.argv.index('--install-xllm-kernels')
546+
if idx + 1 < len(sys.argv):
547+
install_kernels = sys.argv[idx+1].lower()
548+
if install_kernels in ('true', '1', 'yes', 'y', 'on'):
549+
install_kernels = True
550+
elif install_kernels in ('false', '0', 'no', 'n', 'off'):
551+
install_kernels = False
552+
else:
553+
print("Error: --install-xllm-kernels must be true or false")
554+
sys.exit(1)
555+
sys.argv.pop(idx)
556+
sys.argv.pop(idx)
540557

541558
version = get_version()
542559

@@ -577,7 +594,12 @@ def apply_patch():
577594
cmdclass={"build_ext": ExtBuild,
578595
"test": TestUT,
579596
'bdist_wheel': BuildDistWheel},
580-
options={'build_ext': {'device': device,'arch': arch}},
597+
options={'build_ext': {
598+
'device': device,
599+
'arch': arch,
600+
'install_xllm_kernels': install_kernels if install_kernels is not None else "false"
601+
}
602+
},
581603
zip_safe=False,
582604
py_modules=["xllm/launch_xllm", "xllm/__init__",
583605
"xllm/pybind/llm", "xllm/pybind/args"],

xllm/core/distributed_runtime/CMakeLists.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
include(cc_library)
22

3+
if(USE_NPU)
4+
include_directories(
5+
${CMAKE_SOURCE_DIR}/third_party/spdlog/include
6+
)
7+
endif()
8+
39
cc_library(
410
NAME
511
distributed_runtime

xllm/core/distributed_runtime/worker_server.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -131,15 +131,18 @@ void WorkerServer::create_server(const runtime::Options& options,
131131
#if defined(USE_NPU)
132132
atb_speed::base::Mapping mapping;
133133
mapping.ParseParam(mapping_data);
134+
#if defined(USE_A3)
135+
mapping.InitGlobalCommDomain(FLAGS_communication_backend);
136+
#else
134137
mapping.InitCommDomain(FLAGS_communication_backend);
135-
138+
#endif
136139
auto moeEpParallelInfo = mapping.Get(atb_speed::base::MOE_EP);
137140
auto dispatchAndCombinecommDomain =
138141
atb_speed::GetSingleton<atb_speed::ExternalCommManager>().GetCommDomain(
139142
moeEpParallelInfo.groupId,
140143
moeEpParallelInfo.rankIds,
141144
moeEpParallelInfo.rank,
142-
moeEpParallelInfo.backend,
145+
FLAGS_communication_backend,
143146
moeEpParallelInfo.bufferSize,
144147
false);
145148
auto dispatchAndCombineHcclComm =

xllm/core/framework/CMakeLists.txt

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,11 @@ include(cc_library)
22
include(cc_test)
33

44
include_directories(.)
5-
5+
if(USE_NPU)
6+
include_directories(
7+
${CMAKE_SOURCE_DIR}/third_party/spdlog/include
8+
)
9+
endif()
610
add_subdirectory(batch)
711
add_subdirectory(block)
812
add_subdirectory(chat_template)
@@ -27,6 +31,7 @@ cc_library(
2731
DEPS
2832
:common
2933
torch
34+
hccl
3035
glog::glog
3136
)
3237

xllm/core/framework/mapping_npu.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,9 @@ MappingNPU::MappingNPU(const std::string rank_table_file,
5050
get_tp_group(lm_head_tp_);
5151
get_dp_group(lm_head_dp_);
5252
get_tp_group(attn_inner_sp_);
53+
get_tp_group(attn_cp_);
5354

55+
attn_cp_.group_size_ = 1;
5456
// o_proj mixture of tp and dp
5557
if (ENV_enable_extra_o_proj_tp) {
5658
get_domain(attn_o_proj_tp_, attn_o_proj_dp_, 0);
@@ -343,6 +345,7 @@ nlohmann::json MappingNPU::to_json() {
343345
data["lmHeadTp"] = lmhead_tp;
344346
data["lmHeadDp"] = lmhead_dp;
345347
data["lcocAttnTp"] = attn_tp_.to_json();
348+
data["attnCp"] = attn_cp_.to_json();
346349

347350
return data;
348351
}

xllm/core/framework/mapping_npu.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ struct ParallelInfo {
5050

5151
nlohmann::json to_json() {
5252
nlohmann::json data;
53-
// data["group_size"] = group_size_;
53+
data["group_size"] = group_size_;
5454
// data["num_group"] = num_group_;
5555
data["rankIds"] = rank_per_group_[current_group_id_];
5656
data["groupId"] = current_group_id_;
@@ -149,6 +149,8 @@ class MappingNPU final {
149149
ParallelInfo lm_head_tp_ = ParallelInfo();
150150
ParallelInfo lm_head_dp_ = ParallelInfo();
151151
ParallelInfo attn_inner_sp_ = ParallelInfo();
152+
ParallelInfo attn_cp_ = ParallelInfo();
153+
152154
int32_t lccl_comm_domain_lower_bound_;
153155
int32_t lccl_comm_domain_upper_bound_;
154156
};

xllm/core/layers/npu/atb_head_impl.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,11 +59,17 @@ void AtbLmHeadImpl::param_from_args(atb_speed::common::LmHeadParam& param,
5959
param.linearParallelParam.tensorParallelInfo.worldSize =
6060
parallelInfo.rankIds.size();
6161
param.linearParallelParam.tensorParallelInfo.backend =
62-
parallelInfo.backend;
62+
FLAGS_communication_backend;
63+
#if defined(USE_A3)
64+
parallelInfo.InitCommDomain(
65+
param.linearParallelParam.tensorParallelInfo.hcommInfo,
66+
param.linearParallelParam.tensorParallelInfo.commDomain);
67+
#else
6368
param.linearParallelParam.tensorParallelInfo.hcommInfo =
6469
parallelInfo.hcclComm;
6570
param.linearParallelParam.tensorParallelInfo.commDomain =
6671
parallelInfo.commDomain;
72+
#endif
6773
}
6874
}
6975
}

0 commit comments

Comments
 (0)