Move nlp folder to llm folder (#439)

oyilmaz-nvidia · pablo-garay · web-flow · commit d858650fcd39 · 2025-10-10T08:13:03.000-07:00
Signed-off-by: Onur Yilmaz &lt;oyilmaz@nvidia.com&gt;
Signed-off-by: Pablo Garay &lt;pagaray@nvidia.com&gt;
Co-authored-by: Pablo Garay &lt;pagaray@nvidia.com&gt;
diff --git a/nemo_deploy/deploy_ray.py b/nemo_deploy/deploy_ray.py
@@ -25,8 +25,8 @@
     import ray
     from ray import serve
 
-    from nemo_deploy.nlp.hf_deployable_ray import HFRayDeployable
-    from nemo_deploy.nlp.megatronllm_deployable_ray import MegatronRayDeployable
+    from nemo_deploy.llm.hf_deployable_ray import HFRayDeployable
+    from nemo_deploy.llm.megatronllm_deployable_ray import MegatronRayDeployable
     from nemo_export.tensorrt_llm_deployable_ray import TensorRTLLMRayDeployable
 
     HAVE_RAY = True
diff --git a/nemo_deploy/llm/__init__.py b/nemo_deploy/llm/__init__.py
@@ -21,7 +21,7 @@
     stacklevel=2,
 )
 
-from nemo_deploy.nlp.query_llm import (
+from nemo_deploy.llm.query_llm import (
     NemoQueryLLM,
     NemoQueryLLMHF,
     NemoQueryLLMPyTorch,
diff --git a/nemo_deploy/llm/hf_deployable.py b/nemo_deploy/llm/hf_deployable.py
diff --git a/nemo_deploy/llm/hf_deployable_ray.py b/nemo_deploy/llm/hf_deployable_ray.py
@@ -22,7 +22,7 @@
 from fastapi import FastAPI, HTTPException
 from ray import serve
 
-from nemo_deploy.nlp.hf_deployable import HuggingFaceLLMDeploy
+from nemo_deploy.llm.hf_deployable import HuggingFaceLLMDeploy
 from nemo_deploy.ray_utils import find_available_port
 
 LOGGER = logging.getLogger("NeMo")
diff --git a/nemo_deploy/llm/inference/__init__.py b/nemo_deploy/llm/inference/__init__.py
@@ -0,0 +1,28 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from nemo_deploy.llm.inference.inference_base import (
+    create_mcore_engine,
+    setup_megatron_model_and_tokenizer_for_inference,
+    setup_model_and_tokenizer_for_inference,
+)
+from nemo_deploy.llm.inference.tron_utils import DistributedInitConfig, RNGConfig
+
+__all__ = [
+    "create_mcore_engine",
+    "setup_model_and_tokenizer_for_inference",
+    "setup_megatron_model_and_tokenizer_for_inference",
+    "DistributedInitConfig",
+    "RNGConfig",
+]
diff --git a/nemo_deploy/llm/inference/inference_base.py b/nemo_deploy/llm/inference/inference_base.py
diff --git a/nemo_deploy/llm/inference/tron_utils.py b/nemo_deploy/llm/inference/tron_utils.py
diff --git a/nemo_deploy/llm/megatronllm_deployable.py b/nemo_deploy/llm/megatronllm_deployable.py
@@ -25,7 +25,7 @@
 from megatron.core.inference.inference_request import InferenceRequest
 
 from nemo_deploy import ITritonDeployable
-from nemo_deploy.nlp.inference.inference_base import create_mcore_engine
+from nemo_deploy.llm.inference.inference_base import create_mcore_engine
 from nemo_deploy.utils import (
     NEMO2,
     broadcast_list,
diff --git a/nemo_deploy/llm/megatronllm_deployable_ray.py b/nemo_deploy/llm/megatronllm_deployable_ray.py
diff --git a/nemo_deploy/llm/query_llm.py b/nemo_deploy/llm/query_llm.py
diff --git a/nemo_deploy/llm/trtllm_api_deployable.py b/nemo_deploy/llm/trtllm_api_deployable.py
diff --git a/nemo_deploy/service/fastapi_interface_to_pytriton.py b/nemo_deploy/service/fastapi_interface_to_pytriton.py
@@ -17,7 +17,7 @@
 from pydantic import BaseModel, model_validator
 from pydantic_settings import BaseSettings
 
-from nemo_deploy.nlp import NemoQueryLLMPyTorch
+from nemo_deploy.llm import NemoQueryLLMPyTorch
 
 try:
     from nemo.utils import logging
diff --git a/pyproject.toml b/pyproject.toml
@@ -179,7 +179,6 @@ select = [
     "F401", # imported but unused
     "E741", # ambiguous variable name
     "F821", # undefined name
-    "E266", # too many leading '#' for block comment
     "I",    # isort
     "D101", # docstring
     "D103",
@@ -195,7 +194,7 @@ ignore = [
 convention = "google"
 
 # Section to exclude errors for different file types
-[tool.ruff.per-file-ignores]
+[tool.ruff.lint.per-file-ignores]
 # Ignore all directories named `tests`.
 "tests/**" = ["D"]
 # Ignore all files that end in `_test.py`.
diff --git a/scripts/deploy/nlp/benchmark_llm_inframework.py b/scripts/deploy/nlp/benchmark_llm_inframework.py
@@ -19,7 +19,7 @@
 
 import numpy as np
 
-from nemo_deploy.nlp import NemoQueryLLMPyTorch
+from nemo_deploy.llm import NemoQueryLLMPyTorch
 
 # Test prompts for benchmarking
 TEST_PROMPTS = [
diff --git a/scripts/deploy/nlp/deploy_inframework_hf_triton.py b/scripts/deploy/nlp/deploy_inframework_hf_triton.py
@@ -22,7 +22,7 @@
 import torch.distributed as dist
 
 from nemo_deploy import DeployPyTriton
-from nemo_deploy.nlp.hf_deployable import HuggingFaceLLMDeploy
+from nemo_deploy.llm.hf_deployable import HuggingFaceLLMDeploy
 
 LOGGER = logging.getLogger("NeMo")
 
diff --git a/scripts/deploy/nlp/deploy_inframework_triton.py b/scripts/deploy/nlp/deploy_inframework_triton.py
@@ -31,7 +31,7 @@
 
 megatron_llm_supported = True
 try:
-    from nemo_deploy.nlp.megatronllm_deployable import MegatronLLMDeployableNemo2
+    from nemo_deploy.llm.megatronllm_deployable import MegatronLLMDeployableNemo2
 except Exception as e:
     LOGGER.warning(f"Cannot import MegatronLLMDeployable, it will not be available. {type(e).__name__}: {e}")
     megatron_llm_supported = False
diff --git a/scripts/deploy/nlp/deploy_triton.py b/scripts/deploy/nlp/deploy_triton.py
@@ -33,7 +33,7 @@ class UsageError(Exception):
 
 megatron_llm_supported = True
 try:
-    from nemo_deploy.nlp.megatronllm_deployable import MegatronLLMDeployable
+    from nemo_deploy.llm.megatronllm_deployable import MegatronLLMDeployable
 except Exception as e:
     LOGGER.warning(f"Cannot import MegatronLLMDeployable, it will not be available. {type(e).__name__}: {e}")
     megatron_llm_supported = False
diff --git a/scripts/deploy/nlp/deploy_trtllm_api_triton.py b/scripts/deploy/nlp/deploy_trtllm_api_triton.py
@@ -16,7 +16,7 @@
 import logging
 
 from nemo_deploy import DeployPyTriton
-from nemo_deploy.nlp.trtllm_api_deployable import TensorRTLLMAPIDeployable
+from nemo_deploy.llm.trtllm_api_deployable import TensorRTLLMAPIDeployable
 
 LOGGER = logging.getLogger("NeMo")
 
diff --git a/scripts/deploy/nlp/query_inframework.py b/scripts/deploy/nlp/query_inframework.py
@@ -17,7 +17,7 @@
 import sys
 import time
 
-from nemo_deploy.nlp import NemoQueryLLMPyTorch
+from nemo_deploy.llm import NemoQueryLLMPyTorch
 
 LOGGER = logging.getLogger("NeMo")
 
diff --git a/scripts/deploy/nlp/query_inframework_hf.py b/scripts/deploy/nlp/query_inframework_hf.py
@@ -15,7 +15,7 @@
 import argparse
 import sys
 
-from nemo_deploy.nlp import NemoQueryLLMHF
+from nemo_deploy.llm import NemoQueryLLMHF
 
 
 def get_args(argv):
diff --git a/scripts/deploy/nlp/query_trtllm_api.py b/scripts/deploy/nlp/query_trtllm_api.py
@@ -14,7 +14,7 @@
 
 import argparse
 
-from nemo_deploy.nlp import NemoQueryTRTLLMAPI
+from nemo_deploy.llm import NemoQueryTRTLLMAPI
 
 
 def get_args():
diff --git a/scripts/deploy/nlp/query_vllm.py b/scripts/deploy/nlp/query_vllm.py
@@ -18,7 +18,7 @@
 
 import numpy as np
 
-from nemo_deploy.nlp import NemoQueryvLLM
+from nemo_deploy.llm import NemoQueryvLLM
 
 
 def get_args(argv):
diff --git a/tests/functional_tests/utils/run_nemo_deploy.py b/tests/functional_tests/utils/run_nemo_deploy.py
@@ -21,14 +21,14 @@
 
 import torch
 
-from nemo_deploy.nlp.megatronllm_deployable import MegatronLLMDeployableNemo2
+from nemo_deploy.llm.megatronllm_deployable import MegatronLLMDeployableNemo2
 
 run_export_tests = True
 try:
     from nemo_deploy import DeployPyTriton
-    from nemo_deploy.nlp import NemoQueryLLM, NemoQueryLLMPyTorch
+    from nemo_deploy.llm import NemoQueryLLM, NemoQueryLLMPyTorch
     from nemo_export.tensorrt_llm import TensorRTLLM
-except Exception:
+except Exception:  # noqa: BLE001
     run_export_tests = False
 
 
diff --git a/tests/functional_tests/utils/run_nemo_export.py b/tests/functional_tests/utils/run_nemo_export.py
@@ -28,7 +28,7 @@
 triton_supported = True
 try:
     from nemo_deploy import DeployPyTriton
-    from nemo_deploy.nlp import NemoQueryLLM, NemoQueryvLLM
+    from nemo_deploy.llm import NemoQueryLLM, NemoQueryvLLM
 except Exception as e:
     LOGGER.warning(f"Cannot import Triton, deployment will not be available. {type(e).__name__}: {e}")
     triton_supported = False
@@ -37,8 +37,8 @@
 try:
     from megatron.core.inference.common_inference_params import CommonInferenceParams
 
-    from nemo_deploy.nlp import NemoQueryLLMPyTorch
-    from nemo_deploy.nlp.megatronllm_deployable import (
+    from nemo_deploy.llm import NemoQueryLLMPyTorch
+    from nemo_deploy.llm.megatronllm_deployable import (
         MegatronLLMDeploy,
         MegatronLLMDeployableNemo2,
     )
diff --git a/tests/functional_tests/utils/run_trtllm_api_deploy_query.py b/tests/functional_tests/utils/run_trtllm_api_deploy_query.py
@@ -15,8 +15,8 @@
 import argparse
 
 from nemo_deploy import DeployPyTriton
-from nemo_deploy.nlp import NemoQueryTRTLLMAPI
-from nemo_deploy.nlp.trtllm_api_deployable import TensorRTLLMAPIDeployable
+from nemo_deploy.llm import NemoQueryTRTLLMAPI
+from nemo_deploy.llm.trtllm_api_deployable import TensorRTLLMAPIDeployable
 
 
 def get_args():
diff --git a/tests/unit_tests/deploy/test_deploy_query.py b/tests/unit_tests/deploy/test_deploy_query.py
@@ -18,7 +18,7 @@
 from pytriton.model_config import Tensor
 
 from nemo_deploy import DeployPyTriton, ITritonDeployable
-from nemo_deploy.nlp import NemoQueryLLM
+from nemo_deploy.llm import NemoQueryLLM
 from nemo_deploy.utils import cast_output, str_ndarray2list
 
 
diff --git a/tests/unit_tests/deploy/test_hf_deployable.py b/tests/unit_tests/deploy/test_hf_deployable.py
@@ -20,7 +20,7 @@
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
-from nemo_deploy.nlp.hf_deployable import HuggingFaceLLMDeploy
+from nemo_deploy.llm.hf_deployable import HuggingFaceLLMDeploy
 
 
 @pytest.fixture
@@ -48,7 +48,7 @@ def mock_tokenizer():
 
 @pytest.fixture
 def mock_peft_model():
-    with patch("nemo_deploy.nlp.hf_deployable.PeftModel") as mock:
+    with patch("nemo_deploy.llm.hf_deployable.PeftModel") as mock:
         mock.from_pretrained.return_value = MagicMock()
         yield mock
 
diff --git a/tests/unit_tests/deploy/test_hf_deployable_ray.py b/tests/unit_tests/deploy/test_hf_deployable_ray.py
@@ -19,7 +19,7 @@
 import pytest
 from fastapi import FastAPI, HTTPException
 
-from nemo_deploy.nlp.hf_deployable import HuggingFaceLLMDeploy
+from nemo_deploy.llm.hf_deployable import HuggingFaceLLMDeploy
 
 
 # Create a mock of the HFRayDeployable class without decorators for testing
@@ -55,7 +55,7 @@ def _setup_unique_distributed_parameters(self, device_map):
 # Mock fixtures to simulate dependencies
 @pytest.fixture
 def mock_hf_model():
-    with patch("nemo_deploy.nlp.hf_deployable.HuggingFaceLLMDeploy") as mock:
+    with patch("nemo_deploy.llm.hf_deployable.HuggingFaceLLMDeploy") as mock:
         mock_instance = MagicMock(spec=HuggingFaceLLMDeploy)
         mock_instance.ray_infer_fn = MagicMock()
         mock_instance.ray_infer_fn.return_value = {
@@ -68,7 +68,7 @@ def mock_hf_model():
 
 @pytest.fixture
 def mock_ray():
-    with patch("nemo_deploy.nlp.hf_deployable_ray.serve") as mock_serve:
+    with patch("nemo_deploy.llm.hf_deployable_ray.serve") as mock_serve:
         # Mock Ray serve to expose the actual class
         mock_deployment = MagicMock()
         mock_deployment.return_value = lambda x: x  # Return the class itself
@@ -81,13 +81,13 @@ def mock_ray():
 @pytest.fixture
 def mock_hfray_class():
     # Use our custom mock class for testing
-    with patch("nemo_deploy.nlp.hf_deployable_ray.HFRayDeployable", MockHFRayDeployable):
+    with patch("nemo_deploy.llm.hf_deployable_ray.HFRayDeployable", MockHFRayDeployable):
         yield MockHFRayDeployable
 
 
 @pytest.fixture
 def mock_fastapi():
-    with patch("nemo_deploy.nlp.hf_deployable_ray.FastAPI") as mock:
+    with patch("nemo_deploy.llm.hf_deployable_ray.FastAPI") as mock:
         mock.return_value = MagicMock(spec=FastAPI)
         yield mock
 
diff --git a/tests/unit_tests/deploy/test_hf_import.py b/tests/unit_tests/deploy/test_hf_import.py
@@ -20,7 +20,7 @@
 import torch.distributed as dist
 import torch.multiprocessing as mp
 
-from nemo_deploy.nlp.hf_deployable import HuggingFaceLLMDeploy
+from nemo_deploy.llm.hf_deployable import HuggingFaceLLMDeploy
 from nemo_deploy.utils import broadcast_list
 
 
diff --git a/tests/unit_tests/deploy/test_inference_base.py b/tests/unit_tests/deploy/test_inference_base.py
diff --git a/tests/unit_tests/deploy/test_megatron_deployable_ray.py b/tests/unit_tests/deploy/test_megatron_deployable_ray.py
diff --git a/tests/unit_tests/deploy/test_megatronllm_deployable.py b/tests/unit_tests/deploy/test_megatronllm_deployable.py
diff --git a/tests/unit_tests/deploy/test_query_llm.py b/tests/unit_tests/deploy/test_query_llm.py
diff --git a/tests/unit_tests/deploy/test_trtllm_api_deployable.py b/tests/unit_tests/deploy/test_trtllm_api_deployable.py

Original file line number	Diff line number	Diff line change
`@@ -21,7 +21,7 @@`
`21`	`21`	`stacklevel=2,`
`22`	`22`	`)`
`23`	`23`
`24`		`-from nemo_deploy.nlp.query_llm import (`
	`24`	`+from nemo_deploy.llm.query_llm import (`
`25`	`25`	`NemoQueryLLM,`
`26`	`26`	`NemoQueryLLMHF,`
`27`	`27`	`NemoQueryLLMPyTorch,`