@@ -125,17 +125,6 @@ def __init__(self):
125125 quantization = "GPTQ" ,
126126)
127127
128- _psyfighter = "TheBloke/Psyfighter-13B-GPTQ"
129- VllmContainer_JebCarterPsyfighter13B = _make_container (
130- "VllmContainer_JebCarterPsyfighter13B" ,
131- model_name = _psyfighter ,
132- gpu = modal .gpu .A10G (count = 1 ),
133- concurrent_inputs = 4 ,
134- max_containers = 5 ,
135- container_idle_timeout = 2 * 60 ,
136- quantization = "GPTQ" ,
137- )
138-
139128_psyfighter2 = "TheBloke/LLaMA2-13B-Psyfighter2-GPTQ"
140129VllmContainer_KoboldAIPsyfighter2 = _make_container (
141130 name = "VllmContainer_KoboldAIPsyfighter2" ,
@@ -146,38 +135,6 @@ def __init__(self):
146135 quantization = "GPTQ" ,
147136)
148137
149- _noromaid = "TheBloke/Noromaid-v0.1-mixtral-8x7b-Instruct-v3-GPTQ"
150- VllmContainer_NeverSleepNoromaidMixtral8x7B = _make_container (
151- name = "VllmContainer_NeverSleepNoromaidMixtral8x7B" ,
152- model_name = _noromaid ,
153- gpu = modal .gpu .A100 (count = 1 , memory = 40 ),
154- concurrent_inputs = 4 ,
155- max_containers = 1 ,
156- quantization = "GPTQ" ,
157- dtype = "float16" , # vLLM errors when using dtype="auto" with this model
158- )
159-
160- _bagel = "TheBloke/bagel-34b-v0.2-GPTQ"
161- VllmContainer_JohnDurbinBagel34B = _make_container (
162- name = "VllmContainer_JohnDurbinBagel34B" ,
163- model_name = _bagel ,
164- gpu = modal .gpu .A100 (count = 1 , memory = 40 ),
165- concurrent_inputs = 4 ,
166- max_containers = 1 ,
167- max_model_len = 8_000 , # Reduced from original 200k
168- quantization = "GPTQ" ,
169- dtype = "float16" , # vLLM errors when using dtype="auto" with this model
170- )
171-
172- _midnight_rose = "sambarnes/Midnight-Rose-70B-v2.0.3-GPTQ"
173- VllmContainer_MidnightRose70B = _make_container (
174- name = "VllmContainer_MidnightRose70B" ,
175- model_name = _midnight_rose ,
176- gpu = modal .gpu .H100 (count = 1 ),
177- concurrent_inputs = 4 ,
178- max_containers = 1 ,
179- quantization = "GPTQ" ,
180- )
181138
182139# A re-mapping of model names to their respective quantized models.
183140# From the outside, the model name is the original, but internally,
@@ -189,9 +146,5 @@ def __init__(self):
189146QUANTIZED_MODELS = {
190147 "microsoft/phi-2" : _phi2 ,
191148 "Intel/neural-chat-7b-v3-1" : _neural_chat ,
192- "jebcarter/Psyfighter-13B" : _psyfighter ,
193149 "KoboldAI/LLaMA2-13B-Psyfighter2" : _psyfighter2 ,
194- "NeverSleep/Noromaid-v0.1-mixtral-8x7b-Instruct-v3" : _noromaid ,
195- "jondurbin/bagel-34b-v0.2" : _bagel ,
196- "sophosympatheia/Midnight-Rose-70B-v2.0.3" : _midnight_rose ,
197150}
0 commit comments