Skip to content

Commit b590adf

Browse files
authored
Fix vLLM x torch.compile config caching (#16491)
Signed-off-by: rzou <[email protected]>
1 parent b4fe16c commit b590adf

File tree

1 file changed

+9
-3
lines changed

1 file changed

+9
-3
lines changed

vllm/config.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -298,12 +298,18 @@ def compute_hash(self) -> str:
298298
factors.append(self.quantization)
299299
factors.append(self.revision)
300300
factors.append(self.code_revision)
301+
factors.append(self.max_model_len)
302+
factors.append(self.max_logprobs)
303+
factors.append(self.disable_sliding_window)
301304
factors.append(self.trust_remote_code)
305+
factors.append(self.mm_processor_kwargs)
306+
factors.append(self.generation_config)
307+
factors.append(self.model_impl)
308+
factors.append(self.override_generation_config)
302309
factors.append(self.rope_scaling)
303310
factors.append(self.rope_theta)
304-
# rope cos/sin cache depends on the max_position_embeddings
305-
factors.append(
306-
getattr(self.hf_config, "max_position_embeddings", "None"))
311+
# hf_config can control how the model looks!
312+
factors.append(self.hf_config.to_json_string())
307313
return hashlib.sha256(str(factors).encode()).hexdigest()
308314

309315
def __init__(

0 commit comments

Comments
 (0)