Skip to content

Commit dbfa656

Browse files
committed
Minor Cleanup
1 parent b5c1703 commit dbfa656

File tree

2 files changed

+3
-5
lines changed

2 files changed

+3
-5
lines changed

vllm_gaudi/ops/hpu_lora.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import torch
22
import torch.nn.functional as F
33
from vllm.model_executor.custom_op import CustomOp
4-
from vllm.lora import layers
54
from vllm.lora.layers import VocabParallelEmbeddingWithLoRA
65

76

vllm_gaudi/v1/worker/hpu_model_runner.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -442,10 +442,9 @@ def generate_proposals(self, *args, **kwargs):
442442

443443

444444
def _maybe_wrap_in_hpu_graph(*args, **kwargs):
445-
'''return htorch.hpu.wrap_in_hpu_graph(
445+
return htorch.hpu.wrap_in_hpu_graph(
446446
HpuModelAdapter(*args, **kwargs), disable_tensor_cache=True
447-
) if htorch.utils.internal.is_lazy() else HpuModelAdapter(*args, **kwargs)'''
448-
return HpuModelAdapter(*args, **kwargs)
447+
) if htorch.utils.internal.is_lazy() else HpuModelAdapter(*args, **kwargs)
449448

450449

451450
def subtuple(obj: object,
@@ -619,7 +618,7 @@ def __init__(
619618
self.use_hpu_graph = not self.model_config.enforce_eager
620619
self.max_batch_size = self.scheduler_config.max_num_seqs
621620
self.max_num_seqs = self.scheduler_config.max_num_seqs
622-
self.max_prefill_batch_size = 2 # TODO(kzawora): add knob for that
621+
self.max_prefill_batch_size = 1 # TODO(kzawora): add knob for that
623622
self.seen_configs: set = set()
624623
self.max_num_batched_tokens = \
625624
self.scheduler_config.max_num_batched_tokens

0 commit comments

Comments
 (0)