Skip to content

Commit 679cf00

Browse files
authored
Merge branch 'develop' into add_error_log
2 parents baa9896 + 19fda4e commit 679cf00

File tree

140 files changed

+7188
-3899
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

140 files changed

+7188
-3899
lines changed

.github/workflows/_base_test.yml

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
name: Base Test
2+
description: "Run Base Tests"
3+
4+
on:
5+
workflow_call:
6+
inputs:
7+
DOCKER_IMAGE:
8+
description: "Build Images"
9+
required: true
10+
type: string
11+
default: "ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:cuda126-py310"
12+
FASTDEPLOY_ARCHIVE_URL:
13+
description: "URL of the compressed FastDeploy code archive."
14+
required: true
15+
type: string
16+
FASTDEPLOY_WHEEL_URL:
17+
description: "URL of the FastDeploy Wheel."
18+
required: true
19+
type: string
20+
CACHE_DIR:
21+
description: "Cache Dir Use"
22+
required: false
23+
type: string
24+
default: ""
25+
MODEL_CACHE_DIR:
26+
description: "Cache Dir Use"
27+
required: false
28+
type: string
29+
default: ""
30+
31+
jobs:
32+
base_tests:
33+
runs-on: [self-hosted, GPU-h20-1Cards]
34+
steps:
35+
- name: Code Prepare
36+
shell: bash
37+
env:
38+
docker_image: ${{ inputs.DOCKER_IMAGE }}
39+
fd_archive_url: ${{ inputs.FASTDEPLOY_ARCHIVE_URL }}
40+
run: |
41+
set -x
42+
REPO="https://github.com/${{ github.repository }}.git"
43+
FULL_REPO="${{ github.repository }}"
44+
REPO_NAME="${FULL_REPO##*/}"
45+
BASE_BRANCH="${{ github.base_ref }}"
46+
47+
# Clean the repository directory before starting
48+
docker run --rm --net=host -v $(pwd):/workspace -w /workspace \
49+
-e "REPO_NAME=${REPO_NAME}" \
50+
${docker_image} /bin/bash -c '
51+
if [ -d ${REPO_NAME} ]; then
52+
echo "Directory ${REPO_NAME} exists, removing it..."
53+
rm -rf ${REPO_NAME}*
54+
fi
55+
'
56+
57+
wget -q ${fd_archive_url}
58+
tar -xf FastDeploy.tar.gz
59+
rm -rf FastDeploy.tar.gz
60+
cd FastDeploy
61+
git config --global user.name "FastDeployCI"
62+
git config --global user.email "[email protected]"
63+
git log -n 3 --oneline
64+
65+
- name: Run FastDeploy Base Tests
66+
shell: bash
67+
env:
68+
docker_image: ${{ inputs.DOCKER_IMAGE }}
69+
fastdeploy_wheel_url: ${{ inputs.FASTDEPLOY_WHEEL_URL }}
70+
CACHE_DIR: ${{ inputs.CACHE_DIR }}
71+
MODEL_CACHE_DIR: ${{ inputs.MODEL_CACHE_DIR }}
72+
run: |
73+
runner_name="${{ runner.name }}"
74+
last_char="${runner_name: -1}"
75+
76+
if [[ "$last_char" =~ [0-7] ]]; then
77+
DEVICES="$last_char"
78+
else
79+
DEVICES="0"
80+
fi
81+
82+
FLASK_PORT=$((42068 + DEVICES * 100))
83+
FD_API_PORT=$((42088 + DEVICES * 100))
84+
FD_ENGINE_QUEUE_PORT=$((42058 + DEVICES * 100))
85+
FD_METRICS_PORT=$((42078 + DEVICES * 100))
86+
echo "Test ENV Parameter:"
87+
echo "========================================================="
88+
echo "FLASK_PORT=${FLASK_PORT}"
89+
echo "FD_API_PORT=${FD_API_PORT}"
90+
echo "FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT}"
91+
echo "FD_METRICS_PORT=${FD_METRICS_PORT}"
92+
echo "DEVICES=${DEVICES}"
93+
echo "========================================================="
94+
95+
CACHE_DIR="${CACHE_DIR:-$(dirname "$(dirname "${{ github.workspace }}")")}"
96+
echo "CACHE_DIR is set to ${CACHE_DIR}"
97+
if [ ! -f "${CACHE_DIR}/gitconfig" ]; then
98+
touch "${CACHE_DIR}/gitconfig"
99+
fi
100+
if [ ! -d "${MODEL_CACHE_DIR}" ]; then
101+
echo "Error: MODEL_CACHE_DIR '${MODEL_CACHE_DIR}' does not exist."
102+
exit 1
103+
fi
104+
105+
PARENT_DIR=$(dirname "$WORKSPACE")
106+
107+
docker run --rm --ipc=host --pid=host --net=host \
108+
-v $(pwd):/workspace \
109+
-w /workspace \
110+
-e fastdeploy_wheel_url=${fastdeploy_wheel_url} \
111+
-e "FD_API_PORT=${FD_API_PORT}" \
112+
-e "FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT}" \
113+
-e "FD_METRICS_PORT=${FD_METRICS_PORT}" \
114+
-e "FLASK_PORT=${FLASK_PORT}" \
115+
-v "${MODEL_CACHE_DIR}:/MODELDATA" \
116+
-v "${CACHE_DIR}/gitconfig:/etc/gitconfig:ro" \
117+
-v "${CACHE_DIR}/.cache:/root/.cache" \
118+
-v "${CACHE_DIR}/ConfigDir:/root/.config" \
119+
-e TZ="Asia/Shanghai" \
120+
--gpus '"device='"${DEVICES}"'"' ${docker_image} /bin/bash -xc '
121+
# python -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
122+
python -m pip install paddlepaddle-gpu==3.0.0.dev20250729 -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
123+
124+
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
125+
126+
python -m pip install ${fastdeploy_wheel_url}
127+
python -m pip install pytest
128+
129+
wget https://paddle-qa.bj.bcebos.com/zhengtianyu/tools/llm-deploy-linux-amd64
130+
chmod +x ./llm-deploy-linux-amd64
131+
./llm-deploy-linux-amd64 -python python3.10 \
132+
-model_name ERNIE-4.5-0.3B-Paddle \
133+
-model_path /MODELDATA \
134+
--skip install
135+
136+
git config --global --add safe.directory /workspace/FastDeploy
137+
cd FastDeploy
138+
pushd test/ce/deploy
139+
python3.10 deploy.py > dd.log 2>&1 &
140+
sleep 3
141+
curl -X POST http://0.0.0.0:${FLASK_PORT}/start \
142+
-H "Content-Type: application/json" \
143+
-d "{\"--model\": \"/MODELDATA/ERNIE-4.5-0.3B-Paddle\"}"
144+
145+
curl -X POST http://localhost:${FLASK_PORT}/wait_for_infer?timeout=90
146+
popd
147+
148+
pushd test/ce/server
149+
export URL=http://localhost:${FD_API_PORT}/v1/chat/completions
150+
export TEMPLATE=TOKEN_LOGPROB
151+
TEST_EXIT_CODE=0
152+
python -m pytest -sv test_base_chat.py test_compare_top_logprobs.py test_logprobs.py test_params_boundary.py test_seed_usage.py test_stream.py test_evil_cases.py || TEST_EXIT_CODE=1
153+
curl -X POST http://0.0.0.0:${FLASK_PORT}/switch \
154+
-H "Content-Type: application/json" \
155+
-d "{\"--model\": \"/MODELDATA/ERNIE-4.5-0.3B-Paddle\", \"--early-stop-config\": \"{\\\"enable_early_stop\\\":true, \\\"window_size\\\":6, \\\"threshold\\\":0.93}\"}"
156+
curl -X POST http://localhost:${FLASK_PORT}/wait_for_infer?timeout=90
157+
python -m pytest -sv test_repetition_early_stop.py || TEST_EXIT_CODE=1
158+
popd
159+
echo "TEST_EXIT_CODE=${TEST_EXIT_CODE}" >> /workspace/FastDeploy/exit_code.env
160+
'
161+
if [ -f ./FastDeploy/exit_code.env ]; then
162+
source ./FastDeploy/exit_code.env
163+
cat ./FastDeploy/exit_code.env >> $GITHUB_ENV
164+
fi
165+
echo "TEST_EXIT_CODE=${TEST_EXIT_CODE}"
166+
exit ${TEST_EXIT_CODE}

.github/workflows/_build_linux.yml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -125,9 +125,7 @@ jobs:
125125
export FASTDEPLOY_VERSION="${FASTDEPLOY_VERSION}.dev${DATE_ONLY}"
126126
fi
127127
python -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
128-
pip config set global.index-url http://pip.baidu.com/root/baidu/+simple/
129-
pip config set install.trusted-host pip.baidu.com
130-
pip config set global.extra-index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
128+
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
131129
132130
python -m pip install --upgrade pip
133131
python -m pip install -r requirements.txt

.github/workflows/_clone_linux.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ jobs:
6868
branch_name=${{ github.ref_name }}
6969
target_path=paddle-github-action/BRANCH/FastDeploy/${branch_name}/${commit_id}
7070
fi
71-
wget -q --no-proxy --no-check-certificate https://paddle-qa.bj.bcebos.com/CodeSync/develop/PaddlePaddle/PaddleTest/tools/bos_tools.py
71+
wget -O bos_tools.py -q --no-proxy --no-check-certificate https://paddle-qa.bj.bcebos.com/CodeSync/develop/PaddlePaddle/PaddleTest/tools/bos_tools.py
7272
push_file=$(realpath bos_tools.py)
7373
python -m pip install bce-python-sdk==0.9.29
7474
ls

.github/workflows/_logprob_test_linux.yml

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -70,10 +70,18 @@ jobs:
7070
DEVICES="0"
7171
fi
7272
73-
FLASK_PORT=$((9160 + DEVICES * 100))
74-
FD_API_PORT=$((9180 + DEVICES * 100))
75-
FD_ENGINE_QUEUE_PORT=$((9150 + DEVICES * 100))
76-
FD_METRICS_PORT=$((9170 + DEVICES * 100))
73+
FLASK_PORT=$((42068 + DEVICES * 100))
74+
FD_API_PORT=$((42088 + DEVICES * 100))
75+
FD_ENGINE_QUEUE_PORT=$((42058 + DEVICES * 100))
76+
FD_METRICS_PORT=$((42078 + DEVICES * 100))
77+
echo "Test ENV Parameter:"
78+
echo "========================================================="
79+
echo "FLASK_PORT=${FLASK_PORT}"
80+
echo "FD_API_PORT=${FD_API_PORT}"
81+
echo "FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT}"
82+
echo "FD_METRICS_PORT=${FD_METRICS_PORT}"
83+
echo "DEVICES=${DEVICES}"
84+
echo "========================================================="
7785
7886
CACHE_DIR="${CACHE_DIR:-$(dirname "$(dirname "${{ github.workspace }}")")}"
7987
echo "CACHE_DIR is set to ${CACHE_DIR}"
@@ -86,8 +94,10 @@ jobs:
8694
fi
8795
8896
PARENT_DIR=$(dirname "$WORKSPACE")
97+
unset http_proxy
98+
unset https_proxy
8999
90-
docker run --ipc=host --pid=host --net=host \
100+
docker run --rm --ipc=host --pid=host --net=host \
91101
-v $(pwd):/workspace \
92102
-w /workspace \
93103
-e fastdeploy_wheel_url=${fastdeploy_wheel_url} \
@@ -100,13 +110,12 @@ jobs:
100110
-v "${CACHE_DIR}/.cache:/root/.cache" \
101111
-v "${CACHE_DIR}/ConfigDir:/root/.config" \
102112
-e TZ="Asia/Shanghai" \
103-
--gpus '"device='"${DEVICES}"'"' ${docker_image} /bin/bash -c '
113+
--gpus '"device='"${DEVICES}"'"' ${docker_image} /bin/bash -xc '
104114
# python -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
105115
python -m pip install paddlepaddle-gpu==3.0.0.dev20250729 -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
106116
107-
pip config set global.index-url http://pip.baidu.com/root/baidu/+simple/
108-
pip config set install.trusted-host pip.baidu.com
109-
pip config set global.extra-index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
117+
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
118+
110119
python -m pip install ${fastdeploy_wheel_url}
111120
112121
wget https://paddle-qa.bj.bcebos.com/zhengtianyu/tools/llm-deploy-linux-amd64
@@ -124,6 +133,10 @@ jobs:
124133
-d "{\"--model\": \"/MODELDATA/ERNIE-4.5-0.3B-Paddle\"}"
125134
126135
curl -X POST http://localhost:${FLASK_PORT}/wait_for_infer?timeout=90
136+
curl -s -o /dev/null -w "%{http_code}" -m 2 "http://0.0.0.0:${FD_API_PORT}/health"
137+
curl -X POST "http://0.0.0.0:${FD_API_PORT}/v1/chat/completions" \
138+
-H "Content-Type: application/json" \
139+
-d "{\"messages\": [{\"role\": \"user\", \"content\": \"1+1=?\"}], \"logprobs\": true}"
127140
set +e
128141
rm -rf ./baseline_output
129142
cp -r baseline/ERNIE-4.5-0.3B-Paddle ./baseline_output

.github/workflows/_unit_test_coverage.yml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -96,9 +96,8 @@ jobs:
9696
# python -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
9797
python -m pip install paddlepaddle-gpu==3.0.0.dev20250729 -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
9898
99-
pip config set global.index-url http://pip.baidu.com/root/baidu/+simple/
100-
pip config set install.trusted-host pip.baidu.com
101-
pip config set global.extra-index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
99+
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
100+
102101
103102
python -m pip install coverage
104103
python -m pip install diff-cover

.github/workflows/ci_gcu.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@ concurrency:
1313

1414
jobs:
1515
CI_GCU:
16-
runs-on: [self-hosted, GCU-S60-8Card]
16+
runs-on:
17+
group: GCU
1718
steps:
1819
- name: Print current runner name
1920
run: |

.github/workflows/ci_iluvatar.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@ concurrency:
1111

1212
jobs:
1313
CI_ILUVATAR:
14-
runs-on: [self-hosted, IXUCA]
14+
runs-on:
15+
group: IXUCA
1516
steps:
1617
- name: Print current runner name
1718
run: |

.github/workflows/pr_build_and_test.yml

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ jobs:
1919
needs: clone
2020
uses: ./.github/workflows/_build_linux.yml
2121
with:
22-
DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:cuda126-py310
22+
DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:fastdeploy-ciuse-cuda126-dailyupdate
2323
FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }}
2424
COMPILE_ARCH: "89,90"
2525
WITH_NIGHTLY_BUILD: "OFF"
@@ -39,7 +39,7 @@ jobs:
3939
needs: [clone,build]
4040
uses: ./.github/workflows/_unit_test_coverage.yml
4141
with:
42-
DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:cuda126-py310
42+
DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:fastdeploy-ciuse-cuda126-dailyupdate
4343
FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }}
4444
FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }}
4545

@@ -48,7 +48,7 @@ jobs:
4848
needs: [build]
4949
uses: ./.github/workflows/_logprob_test_linux.yml
5050
with:
51-
DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:cuda126-py310
51+
DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:fastdeploy-ciuse-cuda126-dailyupdate
5252
PADDLETEST_ARCHIVE_URL: "https://xly-devops.bj.bcebos.com/PaddleTest/PaddleTest.tar.gz"
5353
FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }}
5454
MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelCache"
@@ -61,3 +61,13 @@ jobs:
6161
DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddle:fastdeploy-ciuse-cuda126
6262
FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }}
6363
FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }}
64+
65+
base_test:
66+
name: Run Base Tests
67+
needs: [clone,build]
68+
uses: ./.github/workflows/_base_test.yml
69+
with:
70+
DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:fastdeploy-ciuse-cuda126-dailyupdate
71+
FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }}
72+
FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }}
73+
MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelCache"

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,3 +167,6 @@ build
167167
.ccls-cache
168168

169169
third_party
170+
171+
custom_ops/gpu_ops/w4afp8_gemm/w4afp8_gemm_*.cu
172+
custom_ops/gpu_ops/w4afp8_gemm/w4afp8_gemm_template.h

custom_ops/gpu_ops/append_attn/append_attention_c16_impl.cuh

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1061,12 +1061,11 @@ void MultiQueryAppendAttention(
10611061
if (!is_decoder) {
10621062
chunk_size = static_cast<uint32_t>(encoder_max_partition_size);
10631063
}
1064-
const int num_chunks = div_up(max_dec_len, chunk_size);
10651064

1065+
const int num_chunks = div_up(max_seq_len, chunk_size);
10661066
dim3 grids(num_blocks_x_cpu, num_chunks, kv_num_heads);
10671067
dim3 blocks(32, num_warps);
1068-
1069-
if (num_chunks <= 1) {
1068+
if (num_chunks <= 0) {
10701069
auto nosplit_kv_kernel =
10711070
multi_query_append_attention_warp1_4_kernel<NV_TYPE,
10721071
false,
@@ -1161,8 +1160,8 @@ void MultiQueryAppendAttention(
11611160
reinterpret_cast<NV_TYPE *>(const_cast<T *>(cache_k.data<T>())),
11621161
reinterpret_cast<NV_TYPE *>(const_cast<T *>(cache_v.data<T>())),
11631162
shift_bias ? reinterpret_cast<NV_TYPE *>(
1164-
const_cast<T *>(shift_bias.get().data<T>()))
1165-
: nullptr,
1163+
const_cast<T *>(shift_bias.get().data<T>()))
1164+
: nullptr,
11661165
smooth_weight ? reinterpret_cast<NV_TYPE *>(
11671166
const_cast<T *>(smooth_weight.get().data<T>()))
11681167
: nullptr,
@@ -1208,8 +1207,8 @@ void MultiQueryAppendAttention(
12081207
seq_lens_encoder.data<int>(),
12091208
cu_seqlens_q.data<int>(),
12101209
shift_bias ? reinterpret_cast<NV_TYPE *>(
1211-
const_cast<T *>(shift_bias.get().data<T>()))
1212-
: nullptr,
1210+
const_cast<T *>(shift_bias.get().data<T>()))
1211+
: nullptr,
12131212
smooth_weight ? reinterpret_cast<NV_TYPE *>(const_cast<T *>(
12141213
smooth_weight.get().data<T>()))
12151214
: nullptr,
@@ -1226,14 +1225,14 @@ void MultiQueryAppendAttention(
12261225
constexpr int blockx = HEAD_DIM / vec_size;
12271226
constexpr int blocky = (128 + blockx - 1) / blockx;
12281227
dim3 grids_merge(min(sm_count * 4, token_num),
1229-
num_heads);
1228+
num_heads);
12301229
dim3 blocks_merge(blockx, blocky);
12311230
merge_multi_chunks_v2_kernel<NV_TYPE,
1232-
vec_size,
1233-
blocky,
1234-
HEAD_DIM,
1235-
OUT_NV_TYPE,
1236-
ENABLE_PREFILL>
1231+
vec_size,
1232+
blocky,
1233+
HEAD_DIM,
1234+
OUT_NV_TYPE,
1235+
ENABLE_PREFILL>
12371236
<<<grids_merge, blocks_merge, 0, stream>>>(
12381237
reinterpret_cast<NV_TYPE *>(tmp_workspace->ptr()),
12391238
static_cast<float *>(tmp_m->ptr()),
@@ -1244,8 +1243,8 @@ void MultiQueryAppendAttention(
12441243
batch_id_per_token.data<int>(),
12451244
cu_seqlens_q.data<int>(),
12461245
shift_bias ? reinterpret_cast<NV_TYPE *>(
1247-
const_cast<T *>(shift_bias.get().data<T>()))
1248-
: nullptr,
1246+
const_cast<T *>(shift_bias.get().data<T>()))
1247+
: nullptr,
12491248
smooth_weight ? reinterpret_cast<NV_TYPE *>(const_cast<T *>(
12501249
smooth_weight.get().data<T>()))
12511250
: nullptr,

0 commit comments

Comments
 (0)