Update logprobs_mode definition

tongyuantongyu · tongyuantongyu · commit 348a54713225 · 2026-01-02T14:31:38.000+08:00
Signed-off-by: Yuan Tong &lt;13075180+tongyuantongyu@users.noreply.github.com&gt;
diff --git a/tensorrt_llm/sampling_params.py b/tensorrt_llm/sampling_params.py
@@ -2,7 +2,7 @@
 import os
 from abc import ABC, abstractmethod
 from dataclasses import dataclass, field, fields
-from typing import List, Literal, NamedTuple, Optional, Tuple, Union
+from typing import List, NamedTuple, Optional, Tuple, Union
 
 import torch
 from pydantic import BaseModel
@@ -49,7 +49,14 @@ class LogprobParams(NamedTuple):
 
 class LogprobMode(StrEnum):
     RAW = "raw"
+    """
+    Return the raw log probabilities, i.e., the log probabilities calculated directly from the model output logits.
+    """
     PROCESSED = "processed"
+    """
+    Return the processed log probabilities, i.e., the log probabilities after applying sampling parameters,
+    such as temperature, top-k, top-p, etc.
+    """
 
 
 class LogitsProcessor(ABC):
@@ -180,7 +187,7 @@ class SamplingParams:
 
         logprobs (int, optional): Number of log probabilities to return per output token. When set to 0, return only the sampled token's log probability.
                                   When set to K>0, return top-K log probabilities + the sampled token's log probability (last entry) if it's not in the Top-K. Defaults to None.
-        logprobs_mode (Literal["raw", "processed"]): The mode of log probabilities to return. Valid modes are "raw" and "processed". Defaults to "raw".
+        logprobs_mode (LogprobMode, optional): The mode of log probabilities to return. Defaults to RAW.
         prompt_logprobs (int, optional): Number of log probabilities to return per prompt token. Defaults to None.
         return_context_logits (bool): Controls if Result should contain the context logits. Defaults to False.
         return_generation_logits (bool): Controls if Result should contain the generation logits. Defaults to False.
@@ -227,7 +234,7 @@ class SamplingParams:
     n: int = 1
     best_of: Optional[int] = None
     use_beam_search: bool = False
-    logprobs_mode: Literal["raw", "processed"] = "raw"
+    logprobs_mode: LogprobMode = LogprobMode.RAW
 
     # Keep the below fields in sync with tllme.SamplingConfig or maintin the mapping table.
     top_k: Optional[int] = None
@@ -330,11 +337,7 @@ def _validate(self):
                 f"under the greedy decoding."
             )
 
-        if self.logprobs_mode not in [LogprobMode.RAW, LogprobMode.PROCESSED]:
-            raise ValueError(
-                f"logprobs_mode must be one of {LogprobMode.RAW.value}, {LogprobMode.PROCESSED.value}. "
-                f"Got {self.logprobs_mode} instead."
-            )
+        self.logprobs_mode = LogprobMode(self.logprobs_mode)
 
         if self.truncate_prompt_tokens is not None and self.truncate_prompt_tokens < 1:
             raise ValueError(