Skip to content
This repository was archived by the owner on Sep 6, 2025. It is now read-only.

Commit d6e2819

Browse files
authored
fix: remove deprecated models bagel & psyfighter1 (#92)
1 parent 5202823 commit d6e2819

File tree

1 file changed

+0
-47
lines changed

1 file changed

+0
-47
lines changed

modal/runner/containers/vllm_unified.py

Lines changed: 0 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -125,17 +125,6 @@ def __init__(self):
125125
quantization="GPTQ",
126126
)
127127

128-
_psyfighter = "TheBloke/Psyfighter-13B-GPTQ"
129-
VllmContainer_JebCarterPsyfighter13B = _make_container(
130-
"VllmContainer_JebCarterPsyfighter13B",
131-
model_name=_psyfighter,
132-
gpu=modal.gpu.A10G(count=1),
133-
concurrent_inputs=4,
134-
max_containers=5,
135-
container_idle_timeout=2 * 60,
136-
quantization="GPTQ",
137-
)
138-
139128
_psyfighter2 = "TheBloke/LLaMA2-13B-Psyfighter2-GPTQ"
140129
VllmContainer_KoboldAIPsyfighter2 = _make_container(
141130
name="VllmContainer_KoboldAIPsyfighter2",
@@ -146,38 +135,6 @@ def __init__(self):
146135
quantization="GPTQ",
147136
)
148137

149-
_noromaid = "TheBloke/Noromaid-v0.1-mixtral-8x7b-Instruct-v3-GPTQ"
150-
VllmContainer_NeverSleepNoromaidMixtral8x7B = _make_container(
151-
name="VllmContainer_NeverSleepNoromaidMixtral8x7B",
152-
model_name=_noromaid,
153-
gpu=modal.gpu.A100(count=1, memory=40),
154-
concurrent_inputs=4,
155-
max_containers=1,
156-
quantization="GPTQ",
157-
dtype="float16", # vLLM errors when using dtype="auto" with this model
158-
)
159-
160-
_bagel = "TheBloke/bagel-34b-v0.2-GPTQ"
161-
VllmContainer_JohnDurbinBagel34B = _make_container(
162-
name="VllmContainer_JohnDurbinBagel34B",
163-
model_name=_bagel,
164-
gpu=modal.gpu.A100(count=1, memory=40),
165-
concurrent_inputs=4,
166-
max_containers=1,
167-
max_model_len=8_000, # Reduced from original 200k
168-
quantization="GPTQ",
169-
dtype="float16", # vLLM errors when using dtype="auto" with this model
170-
)
171-
172-
_midnight_rose = "sambarnes/Midnight-Rose-70B-v2.0.3-GPTQ"
173-
VllmContainer_MidnightRose70B = _make_container(
174-
name="VllmContainer_MidnightRose70B",
175-
model_name=_midnight_rose,
176-
gpu=modal.gpu.H100(count=1),
177-
concurrent_inputs=4,
178-
max_containers=1,
179-
quantization="GPTQ",
180-
)
181138

182139
# A re-mapping of model names to their respective quantized models.
183140
# From the outside, the model name is the original, but internally,
@@ -189,9 +146,5 @@ def __init__(self):
189146
QUANTIZED_MODELS = {
190147
"microsoft/phi-2": _phi2,
191148
"Intel/neural-chat-7b-v3-1": _neural_chat,
192-
"jebcarter/Psyfighter-13B": _psyfighter,
193149
"KoboldAI/LLaMA2-13B-Psyfighter2": _psyfighter2,
194-
"NeverSleep/Noromaid-v0.1-mixtral-8x7b-Instruct-v3": _noromaid,
195-
"jondurbin/bagel-34b-v0.2": _bagel,
196-
"sophosympatheia/Midnight-Rose-70B-v2.0.3": _midnight_rose,
197150
}

0 commit comments

Comments
 (0)