|
23 | 23 | GPTInferenceWrapper, |
24 | 24 | ) |
25 | 25 | from megatron.core.transformer.module import MegatronModule |
26 | | -from nemo.collections.llm.gpt.model.base import GPTConfig |
27 | | -from nemo.collections.llm.inference.base import MCoreTokenizerWrappper |
| 26 | + |
| 27 | +try: |
| 28 | + from nemo.collections.llm.gpt.model.base import GPTConfig |
| 29 | + from nemo.collections.llm.inference.base import MCoreTokenizerWrappper |
| 30 | + |
| 31 | + HAVE_NEMO = True |
| 32 | +except (ImportError, ModuleNotFoundError): |
| 33 | + HAVE_NEMO = False |
28 | 34 |
|
29 | 35 | from nemo_deploy.llm.inference.inference_base import ( |
30 | 36 | MCoreEngineWithCleanup, |
|
41 | 47 | from nemo_export_deploy_common.import_utils import UnavailableError |
42 | 48 |
|
43 | 49 |
|
| 50 | +@pytest.mark.skipif(not HAVE_NEMO, reason="NeMo is not installed") |
44 | 51 | @pytest.mark.run_only_on("GPU") |
45 | 52 | class TestInferenceBase(unittest.TestCase): |
46 | 53 | def setUp(self): |
@@ -291,99 +298,6 @@ def test_setup_model_calls_configure_model( |
291 | 298 | # Verify that configure_model(tokenizer) was invoked |
292 | 299 | self.mock_model.configure_model.assert_called_once_with(self.mock_tokenizer) |
293 | 300 |
|
294 | | - @patch("nemo_deploy.llm.inference.inference_base.HAVE_NEMO", True) |
295 | | - @patch("nemo_deploy.llm.inference.inference_base.calculate_padded_vocab_size") |
296 | | - @patch("nemo_deploy.llm.inference.inference_base.GPTInferenceWrapper") |
297 | | - @patch("nemo_deploy.llm.inference.inference_base.TextGenerationController") |
298 | | - @patch("nemo_deploy.llm.inference.inference_base.MCoreEngine") |
299 | | - @patch("nemo_deploy.llm.inference.inference_base.StaticInferenceContext") |
300 | | - @patch("nemo_deploy.llm.inference.inference_base.setup_megatron_model_and_tokenizer_for_inference") |
301 | | - def test_create_mcore_engine_megatron_with_mlm_args( |
302 | | - self, |
303 | | - mock_setup_meg, |
304 | | - mock_static_ctx, |
305 | | - mock_engine_class, |
306 | | - mock_tg_ctrl_class, |
307 | | - mock_gpt_wrapper_class, |
308 | | - mock_calc_pad_vocab, |
309 | | - ): |
310 | | - # Prepare model.config used by InferenceWrapperConfig |
311 | | - mock_model = MagicMock() |
312 | | - mock_model.config = MagicMock() |
313 | | - mock_model.config.hidden_size = 256 |
314 | | - mock_model.config.vocab_size = 32000 |
315 | | - mock_model.config.make_vocab_size_divisible_by = 128 |
316 | | - mock_model.config.tensor_model_parallel_size = 1 |
317 | | - |
318 | | - mock_tokenizer = MagicMock() |
319 | | - |
320 | | - # mlm_args with explicit padded_vocab_size |
321 | | - mlm_args = MagicMock() |
322 | | - mlm_args.padded_vocab_size = 1234 |
323 | | - |
324 | | - mock_setup_meg.return_value = ([mock_model], mock_tokenizer, mlm_args) |
325 | | - mock_static_ctx.from_config.return_value = MagicMock() |
326 | | - |
327 | | - from nemo_deploy.llm.inference.inference_base import create_mcore_engine |
328 | | - |
329 | | - create_mcore_engine(path=self.mock_path, model_format="megatron") |
330 | | - |
331 | | - # Ensure we did NOT compute padded vocab when mlm_args provides it |
332 | | - mock_calc_pad_vocab.assert_not_called() |
333 | | - |
334 | | - # Validate padded_vocab_size flowed into GPTInferenceWrapper config |
335 | | - args, kwargs = mock_gpt_wrapper_class.call_args |
336 | | - inference_wrapper_config = args[1] |
337 | | - self.assertEqual(inference_wrapper_config.padded_vocab_size, 1234) |
338 | | - self.assertEqual(inference_wrapper_config.hidden_size, 256) |
339 | | - |
340 | | - @patch("nemo_deploy.llm.inference.inference_base.HAVE_NEMO", True) |
341 | | - @patch("nemo_deploy.llm.inference.inference_base.calculate_padded_vocab_size") |
342 | | - @patch("nemo_deploy.llm.inference.inference_base.GPTInferenceWrapper") |
343 | | - @patch("nemo_deploy.llm.inference.inference_base.TextGenerationController") |
344 | | - @patch("nemo_deploy.llm.inference.inference_base.MCoreEngine") |
345 | | - @patch("nemo_deploy.llm.inference.inference_base.StaticInferenceContext") |
346 | | - @patch("nemo_deploy.llm.inference.inference_base.setup_megatron_model_and_tokenizer_for_inference") |
347 | | - def test_create_mcore_engine_megatron_without_mlm_args_uses_calculated_padded_vocab( |
348 | | - self, |
349 | | - mock_setup_meg, |
350 | | - mock_static_ctx, |
351 | | - mock_engine_class, |
352 | | - mock_tg_ctrl_class, |
353 | | - mock_gpt_wrapper_class, |
354 | | - mock_calc_pad_vocab, |
355 | | - ): |
356 | | - # Prepare model.config used by InferenceWrapperConfig and pad calculation |
357 | | - mock_model = MagicMock() |
358 | | - mock_model.config = MagicMock() |
359 | | - mock_model.config.hidden_size = 512 |
360 | | - mock_model.config.vocab_size = 30000 |
361 | | - mock_model.config.make_vocab_size_divisible_by = 128 |
362 | | - mock_model.config.tensor_model_parallel_size = 2 |
363 | | - |
364 | | - mock_tokenizer = MagicMock() |
365 | | - |
366 | | - mock_setup_meg.return_value = ([mock_model], mock_tokenizer, None) |
367 | | - mock_static_ctx.from_config.return_value = MagicMock() |
368 | | - mock_calc_pad_vocab.return_value = 24576 |
369 | | - |
370 | | - from nemo_deploy.llm.inference.inference_base import create_mcore_engine |
371 | | - |
372 | | - create_mcore_engine(path=self.mock_path, model_format="megatron") |
373 | | - |
374 | | - # Ensure padded vocab was computed with expected args |
375 | | - mock_calc_pad_vocab.assert_called_once_with( |
376 | | - mock_model.config.vocab_size, |
377 | | - mock_model.config.make_vocab_size_divisible_by, |
378 | | - mock_model.config.tensor_model_parallel_size, |
379 | | - ) |
380 | | - |
381 | | - # Validate padded_vocab_size flowed into GPTInferenceWrapper config |
382 | | - args, kwargs = mock_gpt_wrapper_class.call_args |
383 | | - inference_wrapper_config = args[1] |
384 | | - self.assertEqual(inference_wrapper_config.padded_vocab_size, 24576) |
385 | | - self.assertEqual(inference_wrapper_config.hidden_size, 512) |
386 | | - |
387 | 301 | @patch("nemo_deploy.llm.inference.inference_base.check_is_distributed_checkpoint") |
388 | 302 | @patch("nemo_deploy.llm.inference.inference_base.ckpt_to_weights_subdir") |
389 | 303 | @patch("nemo_deploy.llm.inference.inference_base.ckpt_to_context_subdir") |
|
0 commit comments