Skip to content

Commit 4849c70

Browse files
ko3n1gchtruong814
andauthored
ci: Refactor tests (part 2) (#167)
Signed-off-by: oliver könig <okoenig@nvidia.com> Signed-off-by: Charlie Truong <chtruong@nvidia.com> Co-authored-by: Charlie Truong <chtruong@nvidia.com>
1 parent 0becc99 commit 4849c70

16 files changed

+374
-153
lines changed

.github/workflows/cicd-main.yml

Lines changed: 40 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -143,15 +143,15 @@ jobs:
143143
run: |
144144
echo "Running CI tests"
145145
146-
cicd-unit-tests:
146+
cicd-unit-tests-trtllm:
147147
strategy:
148148
fail-fast: false
149149
matrix:
150150
include:
151-
- script: L0_Unit_Tests_GPU_Export_Deploy
151+
- script: L0_Unit_Tests_GPU
152152
runner: linux-amd64-gpu-rtxa6000-latest-2-nemo
153153
timeout: 30
154-
- script: L0_Unit_Tests_CPU_Export_Deploy
154+
- script: L0_Unit_Tests_CPU
155155
runner: linux-amd64-cpu16
156156
cpu-only: true
157157
needs: [pre-flight, cicd-wait-in-queue]
@@ -175,6 +175,38 @@ jobs:
175175
PAT: ${{ secrets.PAT }}
176176
inference-framework: trtllm
177177

178+
cicd-unit-tests-vllm:
179+
strategy:
180+
fail-fast: false
181+
matrix:
182+
include:
183+
- script: L0_Unit_Tests_GPU
184+
runner: linux-amd64-gpu-rtxa6000-latest-2-nemo
185+
timeout: 30
186+
- script: L0_Unit_Tests_CPU
187+
runner: linux-amd64-cpu16
188+
cpu-only: true
189+
needs: [pre-flight, cicd-wait-in-queue]
190+
runs-on: ${{ matrix.runner }}
191+
name: ${{ matrix.script }}
192+
environment: nemo-ci
193+
steps:
194+
- name: Checkout
195+
uses: actions/checkout@v4
196+
- name: main
197+
uses: ./.github/actions/test-template
198+
with:
199+
script: ${{ matrix.script }}
200+
timeout: ${{ matrix.timeout || 10 }}
201+
is_unit_test: "true"
202+
cpu-only: ${{ matrix.cpu-only || false }}
203+
has-azure-credentials: "true"
204+
azure-client-id: ${{ secrets.AZURE_CLIENT_ID }}
205+
azure-tenant-id: ${{ secrets.AZURE_TENANT_ID }}
206+
azure-subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
207+
PAT: ${{ secrets.PAT }}
208+
inference-framework: vllm
209+
178210
cicd-e2e-tests-trtllm:
179211
strategy:
180212
fail-fast: false
@@ -183,23 +215,15 @@ jobs:
183215
- script: L2_Launch_TRTLLM
184216
runner: linux-amd64-gpu-rtxa6000-latest-2-nemo
185217
timeout: 60
186-
- script: L2_NeMo_2_Export_Deploy_Query_Ray
187-
runner: linux-amd64-gpu-rtxa6000-latest-2-nemo
188-
- script: L2_NeMo_2_Export_Deploy_Query_TRTLLM_Ray
189-
runner: linux-amd64-gpu-rtxa6000-latest-2-nemo
190218
- script: L2_ONNX_TRT_LLM_Embedding_Export
191219
runner: linux-amd64-gpu-rtxa6000-latest-2-nemo
192220
- script: L2_ONNX_TRT_LLM_Embedding_Export_INT8
193221
runner: linux-amd64-gpu-rtxa6000-latest-2-nemo
194-
- script: L2_NeMo_2_Export_TRT_LLM
195-
runner: linux-amd64-gpu-rtxa6000-latest-2-nemo
196-
- script: L2_NeMo_2_Export_In_Framework
197-
runner: linux-amd64-gpu-rtxa6000-latest-2-nemo
198222
- script: L2_NeMo_2_Export_Qnemo_TRT_LLM
199223
runner: linux-amd64-gpu-rtxa6000-latest-2-nemo
200224
- script: L2_TRTLLM_API_Deploy_Query
201225
runner: linux-amd64-gpu-rtxa6000-latest-2-nemo
202-
needs: [cicd-unit-tests]
226+
needs: [cicd-unit-tests-trtllm]
203227
runs-on: ${{ matrix.runner }}
204228
name: ${{ matrix.is_optional && 'PLEASEFIXME_' || '' }}${{ matrix.script }}
205229
environment: nemo-ci
@@ -221,7 +245,7 @@ jobs:
221245
inference-framework: trtllm
222246

223247
cicd-e2e-tests-vllm:
224-
needs: [cicd-unit-tests]
248+
needs: [cicd-unit-tests-vllm]
225249
runs-on: linux-amd64-gpu-rtxa6000-latest-2-nemo
226250
name: ${{ matrix.is_optional && 'PLEASEFIXME_' || '' }}${{ matrix.script }}
227251
environment: nemo-ci
@@ -243,7 +267,7 @@ jobs:
243267
inference-framework: vllm
244268

245269
cicd-e2e-tests-inframework:
246-
needs: [cicd-unit-tests]
270+
needs: [cicd-unit-tests-trtllm, cicd-unit-tests-vllm]
247271
runs-on: linux-amd64-gpu-rtxa6000-latest-2-nemo
248272
name: ${{ matrix.is_optional && 'PLEASEFIXME_' || '' }}${{ matrix.script }}
249273
environment: nemo-ci
@@ -267,7 +291,8 @@ jobs:
267291
Nemo_CICD_Test:
268292
needs:
269293
- pre-flight
270-
- cicd-unit-tests
294+
- cicd-unit-tests-trtllm
295+
- cicd-unit-tests-vllm
271296
- cicd-e2e-tests-trtllm
272297
- cicd-e2e-tests-vllm
273298
if: always()

nemo_deploy/nlp/trtllm_api_deployable.py

Lines changed: 30 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,33 @@
1717
from typing import List, Optional, Union
1818

1919
import numpy as np
20-
from pytriton.decorators import batch, first_value
21-
from pytriton.model_config import Tensor
22-
from tensorrt_llm import SamplingParams
23-
from tensorrt_llm._torch.pyexecutor.config import PyTorchConfig
24-
from tensorrt_llm.llmapi.llm import LLM, TokenizerBase
2520
from transformers import PreTrainedTokenizerBase
2621

2722
from nemo_deploy import ITritonDeployable
2823
from nemo_deploy.utils import cast_output, str_ndarray2list
24+
from nemo_export_deploy_common.import_utils import MISSING_TENSORRT_LLM_MSG, MISSING_TRITON_MSG, null_decorator
25+
26+
try:
27+
from pytriton.decorators import batch, first_value
28+
from pytriton.model_config import Tensor
29+
30+
HAVE_TRITON = True
31+
except ImportError:
32+
from unittest.mock import MagicMock
33+
34+
Tensor = MagicMock()
35+
batch = null_decorator
36+
first_value = null_decorator
37+
HAVE_TRITON = False
38+
39+
try:
40+
from tensorrt_llm import SamplingParams
41+
from tensorrt_llm._torch.pyexecutor.config import PyTorchConfig
42+
from tensorrt_llm.llmapi.llm import LLM, TokenizerBase
43+
44+
HAVE_TENSORRT_LLM = True
45+
except ImportError:
46+
HAVE_TENSORRT_LLM = False
2947

3048
LOGGER = logging.getLogger("NeMo")
3149

@@ -55,7 +73,7 @@ class TensorRTLLMAPIDeployable(ITritonDeployable):
5573
def __init__(
5674
self,
5775
hf_model_id_path: str,
58-
tokenizer: Optional[Union[str, Path, TokenizerBase, PreTrainedTokenizerBase]] = None,
76+
tokenizer: Optional[Union[str, Path, "TokenizerBase", PreTrainedTokenizerBase]] = None,
5977
tensor_parallel_size: int = 1,
6078
pipeline_parallel_size: int = 1,
6179
moe_expert_parallel_size: int = -1,
@@ -66,6 +84,12 @@ def __init__(
6684
dtype: str = "auto",
6785
**kwargs,
6886
):
87+
if not HAVE_TENSORRT_LLM:
88+
raise ImportError(MISSING_TENSORRT_LLM_MSG)
89+
90+
if not HAVE_TRITON:
91+
raise ImportError(MISSING_TRITON_MSG)
92+
6993
config_args = {k: kwargs.pop(k) for k in PyTorchConfig.__annotations__.keys() & kwargs.keys()}
7094
pytorch_config = PyTorchConfig(**config_args)
7195

tests/functional_tests/L2_NeMo_2_Export_Deploy_Query_Ray.sh

Lines changed: 0 additions & 23 deletions
This file was deleted.

tests/functional_tests/L2_NeMo_2_Export_Deploy_Query_TRTLLM_Ray.sh

Lines changed: 0 additions & 25 deletions
This file was deleted.

tests/functional_tests/L2_NeMo_2_Export_In_Framework.sh

Lines changed: 0 additions & 34 deletions
This file was deleted.

tests/functional_tests/L2_NeMo_2_Export_TRT_LLM.sh

Lines changed: 0 additions & 26 deletions
This file was deleted.
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import logging
16+
import shutil
17+
import subprocess
18+
import tempfile
19+
20+
logging.basicConfig(level=logging.INFO)
21+
logger = logging.getLogger(__name__)
22+
23+
24+
class TestInFrameworkExport:
25+
@classmethod
26+
def setup_class(cls):
27+
# Create output directories
28+
cls.testdir = tempfile.mkdtemp()
29+
logger.info(f"Test directory: {cls.testdir}")
30+
31+
# Update HF model
32+
subprocess.run(
33+
[
34+
"coverage",
35+
"run",
36+
"--data-file=/workspace/.coverage",
37+
"--source=/workspace/",
38+
"--parallel-mode",
39+
"tests/functional_tests/utils/test_hf_import.py",
40+
"--hf_model",
41+
"meta-llama/Llama-3.2-1B",
42+
"--output_path",
43+
f"{cls.testdir}/nemo2_ckpt",
44+
"--config",
45+
"Llama32Config1B",
46+
],
47+
check=True,
48+
)
49+
50+
@classmethod
51+
def teardown_class(cls):
52+
logger.info(f"Removing test directory: {cls.testdir}")
53+
shutil.rmtree(cls.testdir)
54+
55+
def test_inframework_export(self):
56+
subprocess.run(
57+
[
58+
"coverage",
59+
"run",
60+
"--data-file=/workspace/.coverage",
61+
"--source=/workspace/",
62+
"--parallel-mode",
63+
"tests/functional_tests/utils/run_nemo_export.py",
64+
"--model_name",
65+
"test",
66+
"--model_type",
67+
"llama",
68+
"--checkpoint_dir",
69+
f"{self.testdir}/nemo2_ckpt",
70+
"--min_tps",
71+
"1",
72+
"--in_framework",
73+
"True",
74+
"--test_deployment",
75+
"True",
76+
"--run_accuracy",
77+
"True",
78+
"--test_data_path",
79+
"tests/functional_tests/data/lambada.json",
80+
"--accuracy_threshold",
81+
"0.0",
82+
"--debug",
83+
],
84+
check=True,
85+
)

0 commit comments

Comments
 (0)