Skip to content

Commit 15a6d2a

Browse files
authored
Support dynamic lora updating (#472)
1 parent 8e1adfd commit 15a6d2a

File tree

3 files changed

+5
-0
lines changed

3 files changed

+5
-0
lines changed

tests/common/vllm_test.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1228,6 +1228,8 @@ def setUp(self):
12281228
self.config.explorer.rollout_model.tensor_parallel_size = 1
12291229
self.config.explorer.rollout_model.chat_template = CHAT_TEMPLATE
12301230
self.config.explorer.rollout_model.enable_openai_api = True
1231+
self.config.explorer.rollout_model.enable_lora = True
1232+
self.config.explorer.rollout_model.enable_runtime_lora_updating = True
12311233

12321234
self.config.check_and_update()
12331235
self.engines, self.auxiliary_engines = create_inference_models(self.config)

trinity/common/config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -556,6 +556,7 @@ class InferenceModelConfig:
556556

557557
# ! DO NOT SET, automatically set from model.lora_configs
558558
enable_lora: bool = False
559+
enable_runtime_lora_updating: bool = False
559560
lora_modules: Optional[List[Dict]] = None
560561
lora_kwargs: Optional[dict] = field(default_factory=dict)
561562

trinity/common/models/vllm_model.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ def __init__(
5454
os.environ["VLLM_ENABLE_V1_MULTIPROCESSING"] = "0"
5555
if self.vllm_version >= parse_version("0.11.0"):
5656
os.environ["VLLM_ALLREDUCE_USE_SYMM_MEM"] = "0"
57+
if self.config.enable_runtime_lora_updating:
58+
os.environ["VLLM_ALLOW_RUNTIME_LORA_UPDATING"] = "1"
5759
if not config.enforce_eager:
5860
# To avoid torch compile conflicts when multiple model are started simultaneously.
5961
# remove this when the following PR is released:

0 commit comments

Comments
 (0)