Skip to content

Commit 21b6a1d

Browse files
committed
fix
Signed-off-by: yiliu30 <[email protected]>
1 parent 4484882 commit 21b6a1d

File tree

4 files changed

+4
-4
lines changed

4 files changed

+4
-4
lines changed

examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/deepseek/run_evaluation.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ fi
107107

108108
# for fp8 kv cache
109109
if [[ "$KV_CACHE_DTYPE" == "fp8" ]]; then
110-
export VLLM_FLASHINFER_DISABLE_Q_QUANTIZATION=0
110+
export VLLM_FLASHINFER_DISABLE_Q_QUANTIZATION=1
111111
export VLLM_ATTENTION_BACKEND="FLASHINFER_MLA"
112112
echo "Using FP8 for KV cache"
113113
fi

examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/deepseek/run_generate.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ fi
110110

111111
# for fp8 kv cache
112112
if [[ "$KV_CACHE_DTYPE" == "fp8" ]]; then
113-
export VLLM_FLASHINFER_DISABLE_Q_QUANTIZATION=0
113+
export VLLM_FLASHINFER_DISABLE_Q_QUANTIZATION=1
114114
export VLLM_ATTENTION_BACKEND="FLASHINFER_MLA"
115115
echo "Using FP8 for KV cache"
116116
fi

examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/qwen/run_evaluation.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ fi
100100

101101
# for fp8 kv cache
102102
if [[ "$KV_CACHE_DTYPE" == "fp8" ]]; then
103-
export VLLM_FLASHINFER_DISABLE_Q_QUANTIZATION=0
103+
export VLLM_FLASHINFER_DISABLE_Q_QUANTIZATION=1
104104
export VLLM_ATTENTION_BACKEND="FLASHINFER"
105105
echo "Using FP8 for KV cache"
106106
fi

examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/qwen/run_generate.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ fi
101101

102102
# for fp8 kv cache
103103
if [[ "$KV_CACHE_DTYPE" == "fp8" ]]; then
104-
export VLLM_FLASHINFER_DISABLE_Q_QUANTIZATION=0
104+
export VLLM_FLASHINFER_DISABLE_Q_QUANTIZATION=1
105105
export VLLM_ATTENTION_BACKEND="FLASHINFER"
106106
echo "Using FP8 for KV cache"
107107
fi

0 commit comments

Comments
 (0)