20
20
skip_if_no_megatron ()
21
21
22
22
23
- import modelopt .torch .peft as mtpf
23
+ import modelopt .torch .peft as mtpeft
24
24
import modelopt .torch .quantization as mtq
25
25
from modelopt .torch .peft .lora .layer import LoRAModule
26
26
from modelopt .torch .utils .plugins import megatron_prefill
@@ -184,7 +184,7 @@ def _test_forward_with_one_lora(lora_config, rank, size):
184
184
prompt_tokens = torch .randint (0 , model .vocab_size , (2 , model .max_sequence_length )).cuda ()
185
185
186
186
original_output = megatron_prefill (model , prompt_tokens )
187
- mtpf .update_model (model , lora_config )
187
+ mtpeft .update_model (model , lora_config )
188
188
lora_output = megatron_prefill (model , prompt_tokens )
189
189
assert lora_output .shape == original_output .shape
190
190
if lora_config == DEFAULT_LORA_CFG_RANDOM_INIT_TEST :
@@ -198,12 +198,12 @@ def _test_forward_with_one_lora(lora_config, rank, size):
198
198
assert torch .allclose (lora_output , original_output , rtol = 1e-5 ), (
199
199
f"{ lora_output } , { original_output } "
200
200
)
201
- mtpf .disable_adapters (model )
201
+ mtpeft .disable_adapters (model )
202
202
lora_disabled_output = megatron_prefill (model , prompt_tokens )
203
203
# Task: Since all LoRA layers are disabled, the output should
204
204
# be identical to the original output.
205
205
assert torch .allclose (lora_disabled_output , original_output , rtol = 1e-5 )
206
- mtpf .enable_adapters (model )
206
+ mtpeft .enable_adapters (model )
207
207
lora_reenabled_output = megatron_prefill (model , prompt_tokens )
208
208
# Task: To verify that toggling LoRA layers from disabled
209
209
# to enabled does not alter the output, the output should remain unchanged.
@@ -260,14 +260,14 @@ def _test_forward_with_two_loras(lora_config_1, lora_config_2, rank, size):
260
260
prompt_tokens = torch .randint (0 , model .vocab_size , (2 , model .max_sequence_length )).cuda ()
261
261
262
262
original_output = megatron_prefill (model , prompt_tokens )
263
- mtpf .update_model (model , lora_config_1 )
263
+ mtpeft .update_model (model , lora_config_1 )
264
264
# output from the first lora only
265
265
lora_1_output = megatron_prefill (model , prompt_tokens )
266
266
267
- mtpf .update_model (model , lora_config_2 )
267
+ mtpeft .update_model (model , lora_config_2 )
268
268
269
- mtpf .disable_adapters (model , adapters_to_disable = [lora_config_1 ["adapter_name" ]])
270
- mtpf .enable_adapters (model , adapters_to_enable = [lora_config_2 ["adapter_name" ]])
269
+ mtpeft .disable_adapters (model , adapters_to_disable = [lora_config_1 ["adapter_name" ]])
270
+ mtpeft .enable_adapters (model , adapters_to_enable = [lora_config_2 ["adapter_name" ]])
271
271
272
272
# output from the 2nd lora only
273
273
lora_2_output = megatron_prefill (model , prompt_tokens )
@@ -276,14 +276,14 @@ def _test_forward_with_two_loras(lora_config_1, lora_config_2, rank, size):
276
276
# Should not be the same
277
277
assert not torch .allclose (lora_1_output , lora_2_output )
278
278
279
- mtpf .enable_adapters (model , adapters_to_enable = [lora_config_1 ["adapter_name" ]])
280
- mtpf .enable_adapters (model , adapters_to_enable = [lora_config_2 ["adapter_name" ]])
279
+ mtpeft .enable_adapters (model , adapters_to_enable = [lora_config_1 ["adapter_name" ]])
280
+ mtpeft .enable_adapters (model , adapters_to_enable = [lora_config_2 ["adapter_name" ]])
281
281
lora_all_output = megatron_prefill (model , prompt_tokens )
282
282
283
283
assert not torch .allclose (lora_all_output , lora_1_output )
284
284
assert not torch .allclose (lora_all_output , lora_2_output )
285
285
286
- mtpf .disable_adapters (model )
286
+ mtpeft .disable_adapters (model )
287
287
both_disabled_output = megatron_prefill (model , prompt_tokens )
288
288
assert torch .allclose (both_disabled_output , original_output )
289
289
@@ -318,7 +318,7 @@ def _test_attr_changes_with_one_lora(lora_config, rank, size):
318
318
model = _gpt_model_provider (tp_size = 1 , hidden_size = hidden_size )
319
319
prompt_tokens = torch .randint (0 , model .vocab_size , (2 , model .max_sequence_length )).cuda ()
320
320
321
- mtpf .update_model (model , lora_config )
321
+ mtpeft .update_model (model , lora_config )
322
322
lora_1_output = megatron_prefill (model , prompt_tokens )
323
323
324
324
for _ , module in model .named_modules ():
@@ -362,7 +362,7 @@ def _test_mcore_save_restore(lora_config, tmp_path, rank, size):
362
362
).cuda ()
363
363
original_output_test = megatron_prefill (model_test , prompt_tokens )
364
364
365
- mtpf .update_model (model_ref , lora_config )
365
+ mtpeft .update_model (model_ref , lora_config )
366
366
367
367
lora_output_ref = megatron_prefill (model_ref , prompt_tokens )
368
368
@@ -401,7 +401,7 @@ def _test_adapter_gradient_flow_freeze_base_model(lora_config, tmp_path, rank, s
401
401
model = _gpt_model_provider (tp_size = size , hidden_size = hidden_size )
402
402
prompt_tokens = torch .randint (0 , model .vocab_size , (2 , model .max_sequence_length )).cuda ()
403
403
404
- mtpf .update_model (model , lora_config )
404
+ mtpeft .update_model (model , lora_config )
405
405
model .train ()
406
406
407
407
# Use a simple forward pass instead for grad check
@@ -453,7 +453,7 @@ def _test_adapter_gradient_flow_freeze_lora_model(lora_config, tmp_path, rank, s
453
453
model = _gpt_model_provider (tp_size = size , hidden_size = hidden_size )
454
454
prompt_tokens = torch .randint (0 , model .vocab_size , (2 , model .max_sequence_length )).cuda ()
455
455
456
- mtpf .update_model (model , local_cfg )
456
+ mtpeft .update_model (model , local_cfg )
457
457
model .train ()
458
458
459
459
# Use a simple forward pass instead for grad check
@@ -504,7 +504,7 @@ def _test_adapter_gradient_flow(lora_config, tmp_path, rank, size):
504
504
model = _gpt_model_provider (tp_size = size , hidden_size = hidden_size )
505
505
prompt_tokens = torch .randint (0 , model .vocab_size , (2 , model .max_sequence_length )).cuda ()
506
506
507
- mtpf .update_model (model , lora_config )
507
+ mtpeft .update_model (model , lora_config )
508
508
model .train ()
509
509
510
510
# Use a simple forward pass instead for grad check
@@ -555,7 +555,7 @@ def forward_func(mod):
555
555
mtq .quantize (model , NVFP4_DEFAULT_CONFIG , forward_func )
556
556
557
557
# Then add the lora
558
- mtpf .update_model (model , lora_config )
558
+ mtpeft .update_model (model , lora_config )
559
559
560
560
# Bypass the output layer
561
561
for name , module in model .named_modules ():
@@ -601,7 +601,7 @@ def _test_lora_then_quantize(lora_config, tmp_path, rank, size):
601
601
model = _gpt_model_provider (tp_size = size , hidden_size = hidden_size )
602
602
prompt_tokens = torch .randint (0 , model .vocab_size , (2 , model .max_sequence_length )).cuda ()
603
603
604
- mtpf .update_model (model , lora_config )
604
+ mtpeft .update_model (model , lora_config )
605
605
lora_output = megatron_prefill (model , prompt_tokens )
606
606
607
607
def forward_func (mod ):
@@ -671,7 +671,7 @@ def forward_func(mod):
671
671
_ = megatron_prefill (model_ref , prompt_tokens )
672
672
673
673
mtq .quantize (model_ref , NVFP4_DEFAULT_CONFIG , forward_func )
674
- mtpf .update_model (model_ref , lora_config )
674
+ mtpeft .update_model (model_ref , lora_config )
675
675
676
676
quantize_lora_output_ref = megatron_prefill (model_ref , prompt_tokens )
677
677
@@ -731,7 +731,7 @@ def _test_mcore_lora_then_quantize_save_restore(lora_config, tmp_path, rank, siz
731
731
).cuda ()
732
732
original_output_test = megatron_prefill (model_test , prompt_tokens )
733
733
734
- mtpf .update_model (model_ref , lora_config )
734
+ mtpeft .update_model (model_ref , lora_config )
735
735
736
736
def forward_func (mod ):
737
737
_ = megatron_prefill (model_ref , prompt_tokens )
0 commit comments