Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 10 additions & 24 deletions .azure-pipelines/scripts/cuda_unit_test/run_cuda_ut.sh
Original file line number Diff line number Diff line change
Expand Up @@ -62,22 +62,9 @@ function run_unit_test() {
# install unit test dependencies
echo "##[group]set up UT env..."
cd "${BUILD_SOURCESDIRECTORY}" || exit 1
uv pip install torch==2.10.0 torchvision
uv pip install git+https://github.com/casper-hansen/AutoAWQ.git --no-build-isolation

# install gptqmodel
CUDA_VER=$(python -c 'import torch; print(f"cu{torch.version.cuda.replace(".", "")}")')
PY_VER=$(python -c 'import sys; print(f"cp{sys.version_info.major}{sys.version_info.minor}")')
TORCH_VER="torch2.10"
WHEEL="gptqmodel-5.7.0-${CUDA_VER}${TORCH_VER}-${PY_VER}-${PY_VER}-linux_x86_64.whl"
URL="https://pkgs.dev.azure.com/lpot-inc/b7121868-d73a-4794-90c1-23135f974d09/_packaging/4728fbab-e069-4cbd-bcca-d35f4d42256b/pypi/download/gptqmodel/5.7/${WHEEL}"
wget -q "$URL" -O "$WHEEL" || { echo "Download failed. Check CUDA/PyTorch/Python versions match (cu126/cu128/cu130, torch2.10, cp310-cp313)"; exit 1; }
mv "$WHEEL" "${WHEEL/-${CUDA_VER}${TORCH_VER}-/+${CUDA_VER}.${TORCH_VER}-}"
uv pip install "./${WHEEL/-${CUDA_VER}${TORCH_VER}-/+${CUDA_VER}.${TORCH_VER}-}" --no-build-isolation
rm -f "./${WHEEL/-${CUDA_VER}${TORCH_VER}-/+${CUDA_VER}.${TORCH_VER}-}"

uv pip install gptqmodel --extra-index-url https://pkgs.dev.azure.com/lpot-inc/neural-compressor/_packaging/gptqmodel-wheels/pypi/simple/
uv pip install -r https://raw.githubusercontent.com/ModelCloud/GPTQModel/refs/tags/v5.7.0/requirements.txt
uv pip install torch==2.11.0 torchvision --index-url https://download.pytorch.org/whl/cu128
uv pip install https://github.com/XuehaoSun/GPTQModel/releases/download/v5.8.0/gptqmodel-5.8.0+cu128torch2.11-cp312-cp312-linux_x86_64.whl
uv pip install -r https://raw.githubusercontent.com/ModelCloud/GPTQModel/refs/tags/v5.8.0/requirements.txt
uv pip install https://github.com/XuehaoSun/llama-cpp-python/releases/download/v0.3.16/llama_cpp_python-0.3.16-cp312-cp312-linux_x86_64.whl
uv pip install 'git+https://github.com/ggml-org/llama.cpp.git#subdirectory=gguf-py'
uv pip install -r test/test_cuda/requirements.txt
Expand Down Expand Up @@ -126,8 +113,9 @@ function run_unit_test_llmc() {
uv pip install -U pytest-cov pytest-html
uv pip install -r test/test_cuda/requirements_llmc.txt
uv pip install .
echo "##[endgroup]"
uv pip list
echo "##[endgroup]"

cd "${BUILD_SOURCESDIRECTORY}/test" || exit 1

export COVERAGE_RCFILE="${BUILD_SOURCESDIRECTORY}/.azure-pipelines/scripts/ut/.coverage"
Expand All @@ -151,9 +139,9 @@ function run_unit_test_sglang() {
uv pip install -U pytest-cov pytest-html
uv pip install -r test/test_cuda/requirements_sglang.txt
uv pip install .
uv pip list
echo "##[endgroup]"

uv pip list
cd "${BUILD_SOURCESDIRECTORY}/test" || exit 1
export COVERAGE_RCFILE="${BUILD_SOURCESDIRECTORY}/.azure-pipelines/scripts/ut/.coverage"

Expand All @@ -176,9 +164,9 @@ function run_unit_test_vllm() {
uv pip install -U pytest-cov pytest-html
uv pip install -r test/test_cuda/requirements_vllm.txt
uv pip install .
uv pip list
echo "##[endgroup]"

uv pip list
cd "${BUILD_SOURCESDIRECTORY}/test" || exit 1
export COVERAGE_RCFILE="${BUILD_SOURCESDIRECTORY}/.azure-pipelines/scripts/ut/.coverage"

Expand All @@ -197,16 +185,14 @@ function main() {
setup_environment
if [ "${test_case}" == "vlm" ]; then
run_unit_test_vlm
elif [ "${test_case}" == "llmc" ]; then
run_unit_test_llmc
elif [ "${test_case}" == "sglang" ]; then
elif [ "${test_case}" == "specific" ]; then
run_unit_test_sglang
elif [ "${test_case}" == "vllm" ]; then
run_unit_test_llmc
run_unit_test_vllm
elif [ "${test_case}" == "all" ]; then
run_unit_test
else
echo "##[error]Invalid test case specified: ${test_case}. Please use 'vlm', 'llmc', 'sglang', 'vllm', or 'all'."
echo "##[error]Invalid test case specified: ${test_case}. Please use 'vlm', 'specific', or 'all'."
exit 1
fi
check_storage_usage
Expand Down
28 changes: 20 additions & 8 deletions .azure-pipelines/scripts/cuda_unit_test/runpod_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,15 +94,27 @@ def check_gpu_count(token):
def run_create_pod(api_key, payload):
url = "https://rest.runpod.io/v1/pods"
headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
response = requests.post(url, json=payload, headers=headers)
max_retries = 3

for attempt in range(max_retries + 1):
response = requests.post(url, json=payload, headers=headers)

if response.status_code >= 500:
if attempt < max_retries:
print(f"⚠️ {response.status_code} Error, Retrying in 60 seconds ({attempt + 1}/{max_retries})...")
time.sleep(60)
continue
else:
print(f"❌ {response.status_code} Error, Reached maximum retry attempts ({max_retries}), giving up.")
response.raise_for_status()

response.raise_for_status()
result = response.json()
if "errors" in result:
print("❌ Errors:")
print(json.dumps(result["errors"], indent=2))
sys.exit(1)
return result
result = response.json()
if "errors" in result:
print("❌ Errors:")
print(json.dumps(result["errors"], indent=2))
sys.exit(1)

return result


def create_pod(args):
Expand Down
12 changes: 4 additions & 8 deletions .azure-pipelines/scripts/ut/run_ut_cuda.sh
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,7 @@ function run_unit_test() {
cd ${REPO_PATH}/test
rm -rf .coverage* *.xml *.html

uv pip install torch==2.10.0 torchvision
uv pip install -v git+https://github.com/casper-hansen/AutoAWQ.git --no-build-isolation
uv pip install torch==2.11.0 torchvision --index-url https://download.pytorch.org/whl/cu128
uv pip install gptqmodel --no-build-isolation
uv pip install -r https://raw.githubusercontent.com/ModelCloud/GPTQModel/refs/heads/main/requirements.txt
CMAKE_ARGS="-DGGML_CUDA=on -DLLAVA_BUILD=off" uv pip install llama-cpp-python
Expand Down Expand Up @@ -132,14 +131,11 @@ function run_unit_test_vlm() {
cd ${REPO_PATH}/test
rm -rf .coverage* *.xml *.html

uv pip install torch==2.10.0 torchvision
uv pip install torch==2.11.0 torchvision --index-url https://download.pytorch.org/whl/cu128
uv pip install gptqmodel --no-build-isolation
uv pip install git+https://github.com/haotian-liu/LLaVA.git@v1.2.2 --no-deps
local site_path=$(python -c "import site; print(site.getsitepackages()[0])")
# reference https://github.com/haotian-liu/LLaVA/issues/1448#issuecomment-2119845242
sed -i '/inputs\[.*image_sizes.*\] = image_sizes/a\ inputs.pop("cache_position")' ${site_path}/llava/model/language_model/llava_llama.py
uv pip install git+https://github.com/deepseek-ai/DeepSeek-VL2.git timm attrdict --no-deps
uv pip install -v git+https://github.com/casper-hansen/AutoAWQ.git@v0.2.0 --no-build-isolation
uv pip install flash-attn==2.7.4.post1 --no-build-isolation
uv pip install flash-attn==2.8.3 --no-build-isolation
uv pip install -r test_cuda/requirements_vlm.txt
cd ${REPO_PATH} && uv pip install . && cd ${REPO_PATH}/test

Expand Down
8 changes: 4 additions & 4 deletions .azure-pipelines/template/ut-template.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,17 +46,17 @@ steps:
- script: |
if [ "${{ parameters.utScriptFileName }}" == "run_ut_xpu" ];then
docker exec ${{ parameters.utContainerName }} bash -c "cd /auto-round \
&& uv pip install torch==2.10.0 torchvision --index-url https://download.pytorch.org/whl/xpu \
&& uv pip install torch==2.10.0 auto-round-lib \
&& uv pip install torch==2.11.0 torchvision --index-url https://download.pytorch.org/whl/xpu \
&& uv pip install torch==2.11.0 auto-round-lib \
&& uv pip install -r requirements.txt \
&& uv pip install -r test/test_ark/requirements.txt \
&& uv pip install -r test/test_xpu/requirements.txt \
&& cd /auto-round && uv pip install . \
&& uv pip list"
else
docker exec ${{ parameters.utContainerName }} bash -c "cd /auto-round \
&& uv pip install torch==2.10.0 torchvision --index-url https://download.pytorch.org/whl/cpu \
&& uv pip install torch==2.10.0 auto-round-lib \
&& uv pip install torch==2.11.0 torchvision --index-url https://download.pytorch.org/whl/cpu \
&& uv pip install torch==2.11.0 auto-round-lib \
&& uv pip install -r requirements.txt \
&& uv pip install -r requirements-cpu.txt \
&& uv pip install -r test/test_cpu/requirements.txt \
Expand Down
6 changes: 1 addition & 5 deletions .azure-pipelines/unit-test-cuda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -138,11 +138,7 @@ stages:
if [ ${{ pair.value.PART }} -eq 0 ]; then
export UV_NO_CACHE=0
bash .azure-pipelines/scripts/cuda_unit_test/run_cuda_ut.sh \
--test_case=sglang
bash .azure-pipelines/scripts/cuda_unit_test/run_cuda_ut.sh \
--test_case=llmc
bash .azure-pipelines/scripts/cuda_unit_test/run_cuda_ut.sh \
--test_case=vllm
--test_case=specific
else
bash .azure-pipelines/scripts/cuda_unit_test/run_cuda_ut.sh \
--test_case=all \
Expand Down
Loading