File tree Expand file tree Collapse file tree 2 files changed +39
-15
lines changed
Expand file tree Collapse file tree 2 files changed +39
-15
lines changed Original file line number Diff line number Diff line change @@ -40,6 +40,7 @@ This document tracks all model weights available in the `/model-weights` directo
4040| ` gemma-2b-it ` | ❌ |
4141| ` gemma-7b ` | ❌ |
4242| ` gemma-7b-it ` | ❌ |
43+ | ` gemma-2-2b-it ` | ✅ |
4344| ` gemma-2-9b ` | ✅ |
4445| ` gemma-2-9b-it ` | ✅ |
4546| ` gemma-2-27b ` | ✅ |
Original file line number Diff line number Diff line change @@ -126,6 +126,18 @@ models:
126126 --tensor-parallel-size : 4
127127 --max-model-len : 4096
128128 --max-num-seqs : 256
129+ gemma-2-2b-it :
130+ model_family : gemma-2
131+ model_variant : 2b-it
132+ model_type : LLM
133+ gpus_per_node : 1
134+ num_nodes : 1
135+ vocab_size : 256000
136+ time : 08:00:00
137+ resource_type : l40s
138+ vllm_args :
139+ --max-model-len : 4096
140+ --max-num-seqs : 256
129141 gemma-2-9b :
130142 model_family : gemma-2
131143 model_variant : 9b
@@ -406,8 +418,7 @@ models:
406418 gpus_per_node : 4
407419 num_nodes : 8
408420 vocab_size : 128256
409- qos : m4
410- time : 02:00:00
421+ time : 08:00:00
411422 resource_type : l40s
412423 vllm_args :
413424 --pipeline-parallel-size : 8
@@ -557,19 +568,6 @@ models:
557568 --tensor-parallel-size : 2
558569 --max-model-len : 65536
559570 --max-num-seqs : 256
560- Llama3-OpenBioLLM-70B :
561- model_family : Llama3-OpenBioLLM
562- model_variant : 70B
563- model_type : LLM
564- gpus_per_node : 4
565- num_nodes : 1
566- vocab_size : 128256
567- time : 08:00:00
568- resource_type : l40s
569- vllm_args :
570- --tensor-parallel-size : 4
571- --max-model-len : 8192
572- --max-num-seqs : 256
573571 Llama-3.1-Nemotron-70B-Instruct-HF :
574572 model_family : Llama-3.1-Nemotron
575573 model_variant : 70B-Instruct-HF
@@ -1107,6 +1105,18 @@ models:
11071105 vllm_args :
11081106 --max-model-len : 4096
11091107 --max-num-seqs : 256
1108+ Qwen3-8B :
1109+ model_family : Qwen3
1110+ model_variant : 8B
1111+ model_type : LLM
1112+ gpus_per_node : 1
1113+ num_nodes : 1
1114+ vocab_size : 151936
1115+ time : 08:00:00
1116+ resource_type : l40s
1117+ vllm_args :
1118+ --max-model-len : 40960
1119+ --max-num-seqs : 256
11101120 Qwen3-14B :
11111121 model_family : Qwen3
11121122 model_variant : 14B
@@ -1119,3 +1129,16 @@ models:
11191129 vllm_args :
11201130 --max-model-len : 40960
11211131 --max-num-seqs : 256
1132+ Qwen3-32B :
1133+ model_family : Qwen3
1134+ model_variant : 32B
1135+ model_type : LLM
1136+ gpus_per_node : 2
1137+ num_nodes : 1
1138+ vocab_size : 151936
1139+ time : 08:00:00
1140+ resource_type : l40s
1141+ vllm_args :
1142+ --tensor-parallel-size : 2
1143+ --max-model-len : 40960
1144+ --max-num-seqs : 256
You can’t perform that action at this time.
0 commit comments