We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 0ec3779 commit 4ff61abCopy full SHA for 4ff61ab
.buildkite/scripts/tpu/quantized_v6e_1.env
@@ -0,0 +1,14 @@
1
+# Environment config
2
+TEST_NAME=llama8bw8a8
3
+CONTAINER_NAME=vllm-tpu
4
+
5
+# vllm config
6
+MODEL=RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a8
7
+MAX_NUM_SEQS=128
8
+MAX_NUM_BATCHED_TOKENS=1024
9
+TENSOR_PARALLEL_SIZE=1
10
+MAX_MODEL_LEN=2048
11
+DOWNLOAD_DIR=/mnt/disks/persist
12
+EXPECTED_THROUGHPUT=10.0
13
+INPUT_LEN=1800
14
+OUTPUT_LEN=128
0 commit comments