|
29 | 29 | echo "Test with deepseek v2 lite passed"
|
30 | 30 |
|
31 | 31 | # granite + inc
|
32 |
| -#echo "Testing granite-8b + inc with vllm-hpu plugin v1" |
33 |
| -#echo QUANT_CONFIG=vllm-gaudi/tests/models/language/generation/inc_unit_scale_quant.json HABANA_VISIBLE_DEVICES=all VLLM_SKIP_WARMUP=true PT_HPU_LAZY_MODE=1 VLLM_USE_V1=1 python -u vllm-gaudi/tests/full_tests/generate.py --model ibm-granite/granite-3.3-2b-instruct --trust-remote-code --quantization inc --kv_cache_dtype fp8_inc |
34 |
| -#QUANT_CONFIG=vllm-gaudi/tests/models/language/generation/inc_unit_scale_quant.json \ |
35 |
| -#HABANA_VISIBLE_DEVICES=all VLLM_SKIP_WARMUP=true PT_HPU_LAZY_MODE=1 VLLM_USE_V1=1 python -u vllm-gaudi/tests/full_tests/generate.py --model ibm-granite/granite-3.3-2b-instruct --trust-remote-code --quantization inc --kv_cache_dtype fp8_inc |
36 |
| -#if [ $? -ne 0 ]; then |
37 |
| -# echo "Error: Test failed for granite + inc" >&2 |
38 |
| -# exit -1 |
39 |
| -#fi |
40 |
| -#echo "Test with granite + inc passed" |
| 32 | +echo "Testing granite-8b + inc with vllm-hpu plugin v1" |
| 33 | +echo QUANT_CONFIG=vllm-gaudi/tests/models/language/generation/inc_unit_scale_quant.json HABANA_VISIBLE_DEVICES=all VLLM_SKIP_WARMUP=true PT_HPU_LAZY_MODE=1 VLLM_USE_V1=1 python -u vllm-gaudi/tests/full_tests/generate.py --model ibm-granite/granite-3.3-2b-instruct --trust-remote-code --quantization inc --kv_cache_dtype fp8_inc |
| 34 | +QUANT_CONFIG=vllm-gaudi/tests/models/language/generation/inc_unit_scale_quant.json \ |
| 35 | +HABANA_VISIBLE_DEVICES=all VLLM_SKIP_WARMUP=true PT_HPU_LAZY_MODE=1 VLLM_USE_V1=1 python -u vllm-gaudi/tests/full_tests/generate.py --model ibm-granite/granite-3.3-2b-instruct --trust-remote-code --quantization inc --kv_cache_dtype fp8_inc |
| 36 | +if [ $? -ne 0 ]; then |
| 37 | + echo "Error: Test failed for granite + inc" >&2 |
| 38 | + exit -1 |
| 39 | +fi |
| 40 | +echo "Test with granite + inc passed" |
41 | 41 |
|
42 | 42 | # deepseek v2 + inc
|
43 |
| -#echo "Testing deepseek_v2 + inc with vllm-hpu plugin v1" |
44 |
| -#echo QUANT_CONFIG=vllm-gaudi/tests/models/language/generation/inc_unit_scale_quant.json HABANA_VISIBLE_DEVICES=all VLLM_SKIP_WARMUP=true PT_HPU_LAZY_MODE=1 VLLM_USE_V1=1 python -u vllm-gaudi/tests/full_tests/generate.py --model deepseek-ai/DeepSeek-V2-Lite-Chat --trust-remote-code --quantization inc --kv_cache_dtype fp8_inc |
45 |
| -#QUANT_CONFIG=vllm-gaudi/tests/models/language/generation/inc_unit_scale_quant.json \ |
46 |
| -#HABANA_VISIBLE_DEVICES=all VLLM_SKIP_WARMUP=true PT_HPU_LAZY_MODE=1 VLLM_USE_V1=1 python -u vllm-gaudi/tests/full_tests/generate.py --model deepseek-ai/DeepSeek-V2-Lite-Chat --trust-remote-code --quantization inc --kv_cache_dtype fp8_inc |
47 |
| -#if [ $? -ne 0 ]; then |
48 |
| -# echo "Error: Test failed for deepseek_v2 + inc" >&2 |
49 |
| -# exit -1 |
50 |
| -#fi |
51 |
| -#echo "Test with deepseek_v2 + inc passed" |
| 43 | +echo "Testing deepseek_v2 + inc with vllm-hpu plugin v1" |
| 44 | +echo QUANT_CONFIG=vllm-gaudi/tests/models/language/generation/inc_unit_scale_quant.json HABANA_VISIBLE_DEVICES=all VLLM_SKIP_WARMUP=true PT_HPU_LAZY_MODE=1 VLLM_USE_V1=1 python -u vllm-gaudi/tests/full_tests/generate.py --model deepseek-ai/DeepSeek-V2-Lite-Chat --trust-remote-code --quantization inc --kv_cache_dtype fp8_inc |
| 45 | +QUANT_CONFIG=vllm-gaudi/tests/models/language/generation/inc_unit_scale_quant.json \ |
| 46 | +HABANA_VISIBLE_DEVICES=all VLLM_SKIP_WARMUP=true PT_HPU_LAZY_MODE=1 VLLM_USE_V1=1 python -u vllm-gaudi/tests/full_tests/generate.py --model deepseek-ai/DeepSeek-V2-Lite-Chat --trust-remote-code --quantization inc --kv_cache_dtype fp8_inc |
| 47 | +if [ $? -ne 0 ]; then |
| 48 | + echo "Error: Test failed for deepseek_v2 + inc" >&2 |
| 49 | + exit -1 |
| 50 | +fi |
| 51 | +echo "Test with deepseek_v2 + inc passed" |
52 | 52 |
|
53 | 53 | # gsm8k test
|
54 | 54 | # used to check HPUattn + MLP
|
|
0 commit comments