huggingface · NathanHB · May 15, 2025 · May 15, 2025 · May 15, 2025 · May 19, 2025
diff --git a/src/lighteval/models/vllm/vllm_model.py b/src/lighteval/models/vllm/vllm_model.py
@@ -105,6 +105,8 @@ class VLLMModelConfig(ModelConfig):
     max_num_batched_tokens: PositiveInt = 2048  # maximum number of tokens per batch
     subfolder: str | None = None
     is_async: bool = False  # Whether to use the async version or sync version of the model
+    use_dual_chunk_attention: bool = False
+    enforce_eager: bool = False
 
 
 class VLLMModel(LightevalModel):
@@ -187,6 +189,8 @@ def _create_auto_model(self, config: VLLMModelConfig) -> Optional[LLM]:
             "seed": int(config.seed),
             "max_num_seqs": int(config.max_num_seqs),
             "max_num_batched_tokens": int(config.max_num_batched_tokens),
+            "enforce_eager": config.enforce_eager,
+            "use_dual_chunk_attention": config.use_dual_chunk_attention,
         }
 
         if config.quantization is not None: