Skip to content

Commit f7244d1

Browse files
authored
fix: Fix flaky KVBM disagg accuracy test (#4135)
Signed-off-by: jthomson04 <[email protected]>
1 parent 3d036fc commit f7244d1

File tree

1 file changed

+15
-5
lines changed

1 file changed

+15
-5
lines changed

tests/kvbm_integration/test_determinism_disagg.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,21 @@ def start_server(self, timeout: int = 300) -> bool:
213213
# Give frontend time to start up
214214
time.sleep(5)
215215

216+
model = os.environ.get(
217+
"KVBM_MODEL_ID", "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
218+
)
219+
220+
# Try to download the model.
221+
print("Attempting model download...")
222+
try:
223+
subprocess.run(
224+
f"pip install hf_transfer && HF_HUB_ENABLE_HF_TRANSFER=1 hf download {model}",
225+
check=True,
226+
shell=True,
227+
)
228+
except subprocess.CalledProcessError:
229+
print("Model download failed. Is this a locally stored model?")
230+
216231
# Launch decoder
217232
self.process_decoder = subprocess.Popen(
218233
self.decoder_cmd,
@@ -223,11 +238,6 @@ def start_server(self, timeout: int = 300) -> bool:
223238
)
224239
print(f"Decoder process started with PID: {self.process_decoder.pid}")
225240

226-
# The prefiller and decoder cannot download the model simultaneously,
227-
# because the Hugging Face rust library (invoked by fetch_llm) needs to hold an exclusive lock on the model files.
228-
print("Sleeping for 60 seconds to allow the decoder to download the model. ")
229-
time.sleep(60)
230-
231241
# Launch prefiller
232242
self.process_prefiller = subprocess.Popen(
233243
self.prefiller_cmd,

0 commit comments

Comments
 (0)