[fix] Remove SpecConfig and fix thread leak issues (NVIDIA#5931)

mikeiovine · web-flow · commit 8950223f6fe3 · 2025-07-12T21:03:24.000+09:00
Signed-off-by: Mike Iovine &lt;6158008+mikeiovine@users.noreply.github.com&gt;
diff --git a/tensorrt_llm/_torch/speculative/__init__.py b/tensorrt_llm/_torch/speculative/__init__.py
@@ -1,5 +1,5 @@
 from .eagle3 import Eagle3SpecMetadata
-from .interface import SpecConfig, SpecMetadata
+from .interface import SpecMetadata
 from .mtp import MTPEagleWorker, MTPSpecMetadata, MTPWorker
 from .ngram import NGramDrafter, NGramPoolManager
 from .utils import (get_num_spec_layers, get_spec_decoder, get_spec_drafter,
@@ -13,7 +13,6 @@
     "MTPWorker",
     "NGramDrafter",
     "NGramPoolManager",
-    "SpecConfig",
     "SpecMetadata",
     "get_num_spec_layers",
     "get_spec_decoder",
diff --git a/tensorrt_llm/_torch/speculative/interface.py b/tensorrt_llm/_torch/speculative/interface.py
@@ -105,17 +105,6 @@ def from_string(name: Optional[str]) -> "SpeculativeDecodingMode":
         return SpeculativeDecodingMode[name.upper()]
 
 
-@dataclass
-class SpecConfig:
-    """
-    Configuration for speculative decoding.
-    This class is deprecated, but thread-leak of pytest raises flaky error if removing it.
-    TODO: remove this class safely.
-    """
-    # The name of speculative decoding.
-    spec_dec_name = None
-
-
 @dataclass
 class SpecMetadata:
     """
diff --git a/tensorrt_llm/llmapi/llm_args.py b/tensorrt_llm/llmapi/llm_args.py
@@ -389,15 +389,15 @@ def supports_backend(self, backend: str) -> bool:
 
 
 class MTPDecodingConfig(DecodingBaseConfig):
-    num_nextn_predict_layers: Optional[int] = 1
-    use_relaxed_acceptance_for_thinking: Optional[bool] = False
-    relaxed_topk: Optional[int] = 1
-    relaxed_delta: Optional[float] = 0.
-    use_mtp_vanilla: Optional[bool] = False
+    num_nextn_predict_layers: int = 1
+    use_relaxed_acceptance_for_thinking: bool = False
+    relaxed_topk: int = 1
+    relaxed_delta: float = 0.
+    use_mtp_vanilla: bool = False
 
     # TODO: remove this after distinguishing `max_draft_len` and `num_nextn_predict_layers`
     # Now we need a flag when MTPDecodingConfig is updated by PyTorchModelEngine.
-    num_nextn_predict_layers_from_model_config: Optional[int] = 1
+    num_nextn_predict_layers_from_model_config: int = 1
 
     # TODO: Hard code for DeepSeek R1
     # When encounter <think>, start thinking phase.
diff --git a/tests/integration/defs/__init__.py b/tests/integration/defs/__init__.py
@@ -12,3 +12,11 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
+# This import inexplicably starts a thread!
+# This causes problems for our test infra. The issue is that TRTLLM will import
+# this module. If the import happens before the test starts, there are no problems.
+# But if the import happens lazily after the test starts, pytest will think you leaked
+# the thread. We thus do the import here to prevent thread leak issues cropping up when messing
+# with the import statements in tests.
+from torch._inductor import lowering  # NOQA
diff --git a/tests/integration/defs/accuracy/accuracy_core.py b/tests/integration/defs/accuracy/accuracy_core.py
@@ -25,7 +25,6 @@
 import tensorrt_llm.evaluate
 from tensorrt_llm import LLM as PyTorchLLM
 from tensorrt_llm._tensorrt_engine import LLM
-from tensorrt_llm._torch.speculative import SpecConfig
 from tensorrt_llm.builder import BuildConfig
 from tensorrt_llm.llmapi import SamplingParams
 from tensorrt_llm.llmapi.llm_args import DecodingBaseConfig
@@ -156,10 +155,6 @@ def evaluate(self,
             spec_dec_algo = None
         elif isinstance(llm.args.speculative_config, DecodingBaseConfig):
             spec_dec_algo = llm.args.speculative_config.decoding_type
-        elif isinstance(llm.args.speculative_config, SpecConfig):
-            # This branch is deprecated, but thread-leak of pytest raises flaky error if removing it.
-            # TODO: remove this branch safely.
-            spec_dec_algo = llm.args.speculative_config.spec_dec_name
         else:
             raise ValueError(
                 f"Not recognized speculative_config: {llm.args.speculative_config}."