2020skip_if_no_megatron ()
2121
2222
23- import modelopt .torch .peft as mtpf
23+ import modelopt .torch .peft as mtpeft
2424import modelopt .torch .quantization as mtq
2525from modelopt .torch .peft .lora .layer import LoRAModule
2626from modelopt .torch .utils .plugins import megatron_prefill
@@ -184,7 +184,7 @@ def _test_forward_with_one_lora(lora_config, rank, size):
184184 prompt_tokens = torch .randint (0 , model .vocab_size , (2 , model .max_sequence_length )).cuda ()
185185
186186 original_output = megatron_prefill (model , prompt_tokens )
187- mtpf .update_model (model , lora_config )
187+ mtpeft .update_model (model , lora_config )
188188 lora_output = megatron_prefill (model , prompt_tokens )
189189 assert lora_output .shape == original_output .shape
190190 if lora_config == DEFAULT_LORA_CFG_RANDOM_INIT_TEST :
@@ -198,12 +198,12 @@ def _test_forward_with_one_lora(lora_config, rank, size):
198198 assert torch .allclose (lora_output , original_output , rtol = 1e-5 ), (
199199 f"{ lora_output } , { original_output } "
200200 )
201- mtpf .disable_adapters (model )
201+ mtpeft .disable_adapters (model )
202202 lora_disabled_output = megatron_prefill (model , prompt_tokens )
203203 # Task: Since all LoRA layers are disabled, the output should
204204 # be identical to the original output.
205205 assert torch .allclose (lora_disabled_output , original_output , rtol = 1e-5 )
206- mtpf .enable_adapters (model )
206+ mtpeft .enable_adapters (model )
207207 lora_reenabled_output = megatron_prefill (model , prompt_tokens )
208208 # Task: To verify that toggling LoRA layers from disabled
209209 # to enabled does not alter the output, the output should remain unchanged.
@@ -260,14 +260,14 @@ def _test_forward_with_two_loras(lora_config_1, lora_config_2, rank, size):
260260 prompt_tokens = torch .randint (0 , model .vocab_size , (2 , model .max_sequence_length )).cuda ()
261261
262262 original_output = megatron_prefill (model , prompt_tokens )
263- mtpf .update_model (model , lora_config_1 )
263+ mtpeft .update_model (model , lora_config_1 )
264264 # output from the first lora only
265265 lora_1_output = megatron_prefill (model , prompt_tokens )
266266
267- mtpf .update_model (model , lora_config_2 )
267+ mtpeft .update_model (model , lora_config_2 )
268268
269- mtpf .disable_adapters (model , adapters_to_disable = [lora_config_1 ["adapter_name" ]])
270- mtpf .enable_adapters (model , adapters_to_enable = [lora_config_2 ["adapter_name" ]])
269+ mtpeft .disable_adapters (model , adapters_to_disable = [lora_config_1 ["adapter_name" ]])
270+ mtpeft .enable_adapters (model , adapters_to_enable = [lora_config_2 ["adapter_name" ]])
271271
272272 # output from the 2nd lora only
273273 lora_2_output = megatron_prefill (model , prompt_tokens )
@@ -276,14 +276,14 @@ def _test_forward_with_two_loras(lora_config_1, lora_config_2, rank, size):
276276 # Should not be the same
277277 assert not torch .allclose (lora_1_output , lora_2_output )
278278
279- mtpf .enable_adapters (model , adapters_to_enable = [lora_config_1 ["adapter_name" ]])
280- mtpf .enable_adapters (model , adapters_to_enable = [lora_config_2 ["adapter_name" ]])
279+ mtpeft .enable_adapters (model , adapters_to_enable = [lora_config_1 ["adapter_name" ]])
280+ mtpeft .enable_adapters (model , adapters_to_enable = [lora_config_2 ["adapter_name" ]])
281281 lora_all_output = megatron_prefill (model , prompt_tokens )
282282
283283 assert not torch .allclose (lora_all_output , lora_1_output )
284284 assert not torch .allclose (lora_all_output , lora_2_output )
285285
286- mtpf .disable_adapters (model )
286+ mtpeft .disable_adapters (model )
287287 both_disabled_output = megatron_prefill (model , prompt_tokens )
288288 assert torch .allclose (both_disabled_output , original_output )
289289
@@ -318,7 +318,7 @@ def _test_attr_changes_with_one_lora(lora_config, rank, size):
318318 model = _gpt_model_provider (tp_size = 1 , hidden_size = hidden_size )
319319 prompt_tokens = torch .randint (0 , model .vocab_size , (2 , model .max_sequence_length )).cuda ()
320320
321- mtpf .update_model (model , lora_config )
321+ mtpeft .update_model (model , lora_config )
322322 lora_1_output = megatron_prefill (model , prompt_tokens )
323323
324324 for _ , module in model .named_modules ():
@@ -362,7 +362,7 @@ def _test_mcore_save_restore(lora_config, tmp_path, rank, size):
362362 ).cuda ()
363363 original_output_test = megatron_prefill (model_test , prompt_tokens )
364364
365- mtpf .update_model (model_ref , lora_config )
365+ mtpeft .update_model (model_ref , lora_config )
366366
367367 lora_output_ref = megatron_prefill (model_ref , prompt_tokens )
368368
@@ -401,7 +401,7 @@ def _test_adapter_gradient_flow_freeze_base_model(lora_config, tmp_path, rank, s
401401 model = _gpt_model_provider (tp_size = size , hidden_size = hidden_size )
402402 prompt_tokens = torch .randint (0 , model .vocab_size , (2 , model .max_sequence_length )).cuda ()
403403
404- mtpf .update_model (model , lora_config )
404+ mtpeft .update_model (model , lora_config )
405405 model .train ()
406406
407407 # Use a simple forward pass instead for grad check
@@ -453,7 +453,7 @@ def _test_adapter_gradient_flow_freeze_lora_model(lora_config, tmp_path, rank, s
453453 model = _gpt_model_provider (tp_size = size , hidden_size = hidden_size )
454454 prompt_tokens = torch .randint (0 , model .vocab_size , (2 , model .max_sequence_length )).cuda ()
455455
456- mtpf .update_model (model , local_cfg )
456+ mtpeft .update_model (model , local_cfg )
457457 model .train ()
458458
459459 # Use a simple forward pass instead for grad check
@@ -504,7 +504,7 @@ def _test_adapter_gradient_flow(lora_config, tmp_path, rank, size):
504504 model = _gpt_model_provider (tp_size = size , hidden_size = hidden_size )
505505 prompt_tokens = torch .randint (0 , model .vocab_size , (2 , model .max_sequence_length )).cuda ()
506506
507- mtpf .update_model (model , lora_config )
507+ mtpeft .update_model (model , lora_config )
508508 model .train ()
509509
510510 # Use a simple forward pass instead for grad check
@@ -555,7 +555,7 @@ def forward_func(mod):
555555 mtq .quantize (model , NVFP4_DEFAULT_CONFIG , forward_func )
556556
557557 # Then add the lora
558- mtpf .update_model (model , lora_config )
558+ mtpeft .update_model (model , lora_config )
559559
560560 # Bypass the output layer
561561 for name , module in model .named_modules ():
@@ -601,7 +601,7 @@ def _test_lora_then_quantize(lora_config, tmp_path, rank, size):
601601 model = _gpt_model_provider (tp_size = size , hidden_size = hidden_size )
602602 prompt_tokens = torch .randint (0 , model .vocab_size , (2 , model .max_sequence_length )).cuda ()
603603
604- mtpf .update_model (model , lora_config )
604+ mtpeft .update_model (model , lora_config )
605605 lora_output = megatron_prefill (model , prompt_tokens )
606606
607607 def forward_func (mod ):
@@ -671,7 +671,7 @@ def forward_func(mod):
671671 _ = megatron_prefill (model_ref , prompt_tokens )
672672
673673 mtq .quantize (model_ref , NVFP4_DEFAULT_CONFIG , forward_func )
674- mtpf .update_model (model_ref , lora_config )
674+ mtpeft .update_model (model_ref , lora_config )
675675
676676 quantize_lora_output_ref = megatron_prefill (model_ref , prompt_tokens )
677677
@@ -731,7 +731,7 @@ def _test_mcore_lora_then_quantize_save_restore(lora_config, tmp_path, rank, siz
731731 ).cuda ()
732732 original_output_test = megatron_prefill (model_test , prompt_tokens )
733733
734- mtpf .update_model (model_ref , lora_config )
734+ mtpeft .update_model (model_ref , lora_config )
735735
736736 def forward_func (mod ):
737737 _ = megatron_prefill (model_ref , prompt_tokens )
0 commit comments