Skip to content

Commit a7c261b

Browse files
metax666zhang-chenyiduqimengStareAtYoujxwangmetax
authored
[Metax] update metax CI CMakeLists and add warpctc_warprnn (#1979)
Co-authored-by: chezhang <[email protected]> Co-authored-by: duqimeng <[email protected]> Co-authored-by: Mingkun.Zhang <[email protected]> Co-authored-by: jiaxinWang-metax <[email protected]> Co-authored-by: MingkunZhang <[email protected]> Co-authored-by: zhang-chenyi <[email protected]> Co-authored-by: ZhouDuan <[email protected]>
1 parent 96d2518 commit a7c261b

File tree

140 files changed

+7225
-1111
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

140 files changed

+7225
-1111
lines changed

.github/workflows/metax_work.yaml

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
name: padlle metax gpu test
2+
3+
on:
4+
workflow_dispatch:
5+
pull_request:
6+
types: [opened, synchronize]
7+
branches: [develop, release/**]
8+
paths:
9+
- "**"
10+
- "!backends/**"
11+
- "backends/metax_gpu/**"
12+
13+
permissions: read-all
14+
15+
defaults:
16+
run:
17+
shell: bash
18+
19+
jobs:
20+
metax-gpu-test:
21+
runs-on: paddle-metax-runner-set
22+
# runs-on: debug-paddle-runner-set
23+
steps:
24+
- name: Checkout repository
25+
run: |
26+
git config --global user.name "GitHub Actions"
27+
git config --global user.email "[email protected]"
28+
29+
git clone \
30+
--reference-if-able /home/runner/PaddleCustomDevice \
31+
--depth=1 \
32+
--shallow-submodules \
33+
--jobs=8 \
34+
--branch ${{ github.base_ref || github.ref_name}} \
35+
--recurse-submodules \
36+
https://${{ github.actor }}:${{ secrets.GITHUB_TOKEN }}@github.com/${{ github.repository }}.git .
37+
38+
if [ "${{ github.event_name }}" == "pull_request" ]; then
39+
git fetch origin pull/${{ github.event.pull_request.number }}/head:pull/${{ github.event.pull_request.number }}/head
40+
git checkout pull/${{ github.event.pull_request.number }}/head
41+
42+
# git submodule update --init --recursive
43+
fi
44+
45+
46+
- name: compile
47+
run: |
48+
cd backends/metax_gpu
49+
bash build.sh
50+
51+
- name: run test
52+
run: |
53+
cd backends/metax_gpu/tests
54+
bash run_test.sh -j 16

backends/metax_gpu/CMakeLists.txt

Lines changed: 18 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ include(cblas)
3737
include(flashattn)
3838
include(cutlass)
3939
include(dgc)
40+
include(warpctc)
41+
include(warprnnt)
4042

4143
set(PLUGIN_VERSION ${PADDLE_VERSION})
4244

@@ -308,8 +310,6 @@ file(
308310
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/hinge_loss_grad_kernel.cu
309311
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/hinge_loss_kernel.cu
310312
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/gru_grad_kernel.cu
311-
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/grid_sample_grad_kernel.cu
312-
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/grid_sample_kernel.cu
313313
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/generate_proposals_kernel.cu
314314
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/gaussian_inplace_grad_kernel.cu
315315
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/gammaln_kernel.cu
@@ -612,12 +612,9 @@ file(
612612
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/funcs/math_function.cc
613613
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/log_softmax_kernel.cu
614614
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/log_softmax_grad_kernel.cu
615-
# ${PADDLE_SOURCE_DIR}/paddle/phi/backends/context_pool.cc
616615
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/funcs/repeat_tensor2index_tensor.cu
617616
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/binomial_kernel.cu
618617
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/bernoulli_kernel.cu
619-
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/bmm_grad_kernel_impl.h
620-
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/bmm_kernel.cu
621618
${PADDLE_SOURCE_DIR}/paddle/phi/backends/dynload/cufft.cc
622619
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/box_coder_kernel.cu
623620
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/broadcast_tensors_kernel.cu
@@ -640,31 +637,12 @@ file(
640637
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/gather_tree_kernel.cu
641638
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/graph_reindex_kernel.cu
642639
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/graph_sample_neighbors_kernel.cu
643-
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/group_norm_kernel.cu
644640
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/group_norm_grad_kernel.cu
645641
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/gumbel_softmax_grad_kernel.cu
646642
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/gumbel_softmax_kernel.cu
647-
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_act_dequant_kernel.cu
648-
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/block_multi_head_attention_kernel.cu
649-
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_weighted_swiglu_act_quant_kernel.cu
650-
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_elemwise_activation_kernel.cu
651-
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_softmax_mask_upper_triangle_grad_kernel.cu
652-
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/fp8_gemm/fp8_gemm_with_cublasLt/fp8_fp8_half_gemm.cu
653-
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention_grad_kernel.cu
654-
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/cutlass/fused_conv2d_add_act_kernel.cu
655-
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/cutlass/variable_length_memory_efficient_attention_kernel.cu
656-
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention_kernel.cu
657-
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/cutlass/gemm_epilogue_kernel.cu
658-
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/blha_get_max_len.cu
659-
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_elemwise_activation_grad_kernel.cu
660-
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/stride/as_real_kernel.cc
661-
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/stride/as_complex_kernel.cc
662-
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/stride/complex_grad_kernel.cc
663-
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/stride/complex_kernel.cc
664-
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/selected_rows/shape_kernel.cc
665-
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/sparse/gpu/conv_kernel_igemm.cu
643+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/top_p_sampling_kernel.cu
644+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/rms_norm_kernel.cu
666645
# ############################################################################
667-
# kernels/fusion kernels/selected_rows
668646
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/selected_rows/gpu/adamw_kernel.cu
669647
# kernels/kps
670648
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/kps/elementwise_kernel.cu
@@ -696,7 +674,6 @@ file(
696674
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/legacy/gpu/cal_aux_loss_grad_kernel.cu
697675
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/legacy/gpu/expand_modality_expert_id_kernel.cu
698676
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/legacy/gpu/int_bincount_kernel.cu
699-
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/top_p_sampling_kernel.cu
700677
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_bias_act_kernel.cu)
701678

702679
file(
@@ -706,15 +683,14 @@ file(
706683
passes/*.cc
707684
kernels/*.cc
708685
kernels/*.cu
686+
kernels/fusion/*.cc
687+
kernels/fusion/*.cu
709688
kernels/gpudnn/*.cc
710689
kernels/gpudnn/*.cu
711690
kernels/cuda_kernels/*.cc
712691
kernels/cuda_kernels/*.cu
713692
kernels/funcs/blas/*.cc
714-
kernels/ernie_core/*.cu
715-
kernels/ernie_core/rms_norm_kernel_register.cu
716-
kernels/ernie_core/top_p_sampling_kernel_register.cu
717-
kernels/ernie_core/fused_bias_act_kernel_register.cu)
693+
kernels/ernie_core/*.cu)
718694

719695
set(CUSTOM_DEVICE_SRCS ${CUDA_SRCS} ${CC_SRCS} ${ERNIE_CORE_SRCS})
720696

@@ -723,18 +699,17 @@ set_source_files_properties(${CUSTOM_DEVICE_SRCS} PROPERTIES LANGUAGE CUDA)
723699
set(CMAKE_CUCC_COMPILER "cucc")
724700
set(CMAKE_CUCC_FLAGS "-I /opt/maca/tools/cu-bridge/include/")
725701

726-
set_source_files_properties(
727-
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/rms_norm_kernel.cu
728-
PROPERTIES LANGUAGE CUDA)
729-
add_library(
730-
${TARGET_NAME} SHARED
731-
${CUSTOM_DEVICE_SRCS}
732-
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/rms_norm_kernel.cu)
702+
add_library(${TARGET_NAME} SHARED ${CUSTOM_DEVICE_SRCS})
733703

734704
target_include_directories(
735705
${TARGET_NAME}
736-
PRIVATE ${PADDLE_SOURCE_DIR} ${CMAKE_SOURCE_DIR} ${CMAKE_SOURCE_DIR}/kernels
737-
${CUDA_INCLUDE_DIRS} ${PADDLE_SOURCE_DIR}/third_party/pybind/include
706+
PRIVATE ${PADDLE_SOURCE_DIR}
707+
${CMAKE_SOURCE_DIR}
708+
${CMAKE_SOURCE_DIR}/kernels
709+
${CUDA_INCLUDE_DIRS}
710+
${WARPCTC_INCLUDE_DIR}
711+
${WARPRNNT_INCLUDE_DIR}
712+
${PADDLE_SOURCE_DIR}/third_party/pybind/include
738713
${PADDLE_SOURCE_DIR}/paddle/phi/api/include/compat)
739714

740715
target_link_libraries(
@@ -747,16 +722,16 @@ target_link_libraries(
747722
protobuf
748723
external_error_proto
749724
dgc
725+
${WARPCTC_LIBRARIES}
726+
${WARPRNNT_LIBRARIES}
750727
${PADDLE_CORE_LIB})
751-
target_link_libraries(${TARGET_NAME} /opt/maca/lib/libmccl.so)
752-
target_link_libraries(${TARGET_NAME} /opt/maca/lib/libmcFlashAttn.so)
753-
target_link_libraries(${TARGET_NAME} /opt/maca/lib/libmcpti.so)
754728
include_directories(BEFORE ${PADDLE_SOURCE_DIR})
755729

756730
target_compile_definitions(
757731
${TARGET_NAME}
758732
PUBLIC PADDLE_WITH_CUDA=1
759733
PADDLE_WITH_CUSTOM_DEVICE=1
734+
mcblasContext=cublasContext
760735
GPUContext=CustomContext
761736
KPSContext=CustomContext
762737
STREAM_TYPE=cudaStream_t

backends/metax_gpu/build.sh

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,23 @@ set -e
2020
pip uninstall paddlepaddle -y
2121

2222

23-
export http_proxy=http://10.2.192.21:1080 https_proxy=http://10.2.192.21:1080
23+
# init paddle
24+
# git submodule sync --recursive && git submodule update --init --recursive
25+
26+
# sleep 1000000
27+
# unset http_proxy https_proxy
28+
29+
30+
# export http_proxy=https://172.17.0.1:1080 https_proxy=http://10.2.192.21:1080
31+
# export
2432
pip install safetensors==0.6.2 -i https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple some-package
2533
# install paddle
34+
35+
2636
python -m pip install --pre paddlepaddle -i https://www.paddlepaddle.org.cn/packages/nightly/cpu/
2737

28-
# exit 1
29-
# init paddle
30-
git submodule sync --recursive && git submodule update --init --recursive
3138

39+
# unset http_proxy https_proxy
3240

3341
# apply patch
3442
bash change_patch.sh
@@ -49,8 +57,8 @@ fi
4957

5058
echo "make_maca"
5159
cd build
52-
cmake_maca .. -DPython3_EXECUTABLE=$(which python3) -DWITH_GPU=ON
53-
make_maca -j8
60+
cmake_maca .. -DCMAKE_BUILD_TYPE=Release -DPython3_EXECUTABLE=$(which python3) -DWITH_GPU=ON
61+
make_maca -j60
5462

5563
echo "install whl"
5664
pip install dist/paddle_metax_gpu*.whl --force-reinstall

backends/metax_gpu/change_patch.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,9 @@ unzip mcEigen_3.4.0_paddle_final.zip
2121
mv mcEigen_3.4.0_paddle_final eigen3
2222
cd ..
2323
cp -r patch/eigen3/ ../../Paddle/third_party/eigen3
24+
rm -r patch/eigen3
2425
cp patch/tmp/mixed_vector* ../../Paddle/paddle/phi/core
2526
cd ../../Paddle/
2627
git apply --verbose ../backends/metax_gpu/patch/paddle.patch
2728
cd -
29+
# cp -r patch/intrinsics.cuh ../../Paddle/third_party/warpctc/include/contrib/moderngpu/include/device/

0 commit comments

Comments
 (0)