niukuo
diff --git a/‎tensorrt_llm/_torch/llm.py‎
Lines changed: 2 additions & 4 deletions b/‎tensorrt_llm/_torch/llm.py‎
Lines changed: 2 additions & 4 deletions
diff --git a/‎tensorrt_llm/llmapi/llm.py‎
Lines changed: 0 additions & 2 deletions b/‎tensorrt_llm/llmapi/llm.py‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎tensorrt_llm/llmapi/llm_args.py‎
Lines changed: 4 additions & 4 deletions b/‎tensorrt_llm/llmapi/llm_args.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎tests/unittest/api_stability/api_stability_core.py‎
Lines changed: 66 additions & 26 deletions b/‎tests/unittest/api_stability/api_stability_core.py‎
Lines changed: 66 additions & 26 deletions
diff --git a/‎tests/unittest/api_stability/references/batched_logits_processor.yaml‎
Lines changed: 0 additions & 6 deletions b/‎tests/unittest/api_stability/references/batched_logits_processor.yaml‎
Lines changed: 0 additions & 6 deletions
diff --git a/‎tests/unittest/api_stability/references/calib_config.yaml‎
Lines changed: 0 additions & 11 deletions b/‎tests/unittest/api_stability/references/calib_config.yaml‎
Lines changed: 0 additions & 11 deletions
@@ -17,7 +17,6 @@ def __init__(self,
                  skip_tokenizer_init: bool = False,
                  trust_remote_code: bool = False,
                  tensor_parallel_size: int = 1,
-                 pipeline_parallel_size: int = 1,
                  dtype: str = "auto",
                  revision: Optional[str] = None,
                  tokenizer_revision: Optional[str] = None,
@@ -26,6 +25,5 @@ def __init__(self,
         kwargs_dict = dict(kwargs)
         kwargs_dict['backend'] = 'pytorch'
         super().__init__(model, tokenizer, tokenizer_mode, skip_tokenizer_init,
-                         trust_remote_code, tensor_parallel_size,
-                         pipeline_parallel_size, dtype, revision,
-                         tokenizer_revision, **kwargs_dict)
+                         trust_remote_code, tensor_parallel_size, dtype,
+                         revision, tokenizer_revision, **kwargs_dict)
@@ -98,7 +98,6 @@ def __init__(self,
                  skip_tokenizer_init: bool = False,
                  trust_remote_code: bool = False,
                  tensor_parallel_size: int = 1,
-                 pipeline_parallel_size: int = 1,
                  dtype: str = "auto",
                  revision: Optional[str] = None,
                  tokenizer_revision: Optional[str] = None,
@@ -116,7 +115,6 @@ def __init__(self,
                 skip_tokenizer_init=skip_tokenizer_init,
                 trust_remote_code=trust_remote_code,
                 tensor_parallel_size=tensor_parallel_size,
-                pipeline_parallel_size=pipeline_parallel_size,
                 dtype=dtype,
                 revision=revision,
                 tokenizer_revision=tokenizer_revision,
 
@@ -650,8 +650,6 @@ def model_name(self) -> Union[str, Path]:
 
         tensor_parallel_size(int): The number of processes for tensor parallelism. Defaults to 1.
 
-        pipeline_parallel_size(int): The number of processes for pipeline parallelism. Defaults to 1.
-
         dtype (str): The data type for the model weights and activations. Defaults to "auto".
             Can be "float16", "bfloat16", "float32", or "auto". If "auto", the data type will be automatically inferred from the source model.
             If the source data type is "float32", it will be converted to "float16".
@@ -662,6 +660,8 @@ def model_name(self) -> Union[str, Path]:
 """
 
 LLMARGS_IMPLICIT_DOCSTRING = """
+        pipeline_parallel_size(int): The number of processes for pipeline parallelism. Defaults to 1.
+
         context_parallel_size (int): The context parallel size. Defaults to 1.
 
         gpus_per_node (int, optional): The number of GPUs per node. None means automatic configure. Defaults to None.
@@ -769,15 +769,15 @@ class LlmArgs:
 
     tensor_parallel_size: int = 1
 
-    pipeline_parallel_size: int = 1
-
     dtype: str = "auto"
 
     revision: Optional[str] = None
 
     tokenizer_revision: Optional[str] = None
 
     # Below are all remaining arguments
+    pipeline_parallel_size: int = 1
+
     context_parallel_size: int = 1
 
     gpus_per_node: Optional[int] = None
 
@@ -1,4 +1,5 @@
 # autoflake: skip_file
+import copy
 import inspect
 import os
 import pathlib
@@ -28,13 +29,12 @@ def repr_annotation(field_type: type) -> str:
 
 @dataclass(slots=True)
 class ParamSnapshot:
-    name: str
     annotation: type
     default: Any = None
 
     @classmethod
     def from_inspect(cls, param: inspect.Parameter):
-        return cls(param.name, param.annotation, param.default)
+        return cls(param.annotation, param.default)
 
     @classmethod
     def from_docstring(cls, param: docstring_parser.common.DocstringParam):
@@ -57,7 +57,7 @@ def from_docstring(cls, param: docstring_parser.common.DocstringParam):
             except (NameError, SyntaxError):
                 default = param.default
 
-        return cls(param.arg_name, annotation, default)
+        return cls(annotation, default)
 
     @classmethod
     def from_dict(cls, d: dict):
@@ -77,19 +77,17 @@ def to_dict(self):
         return d
 
     def assert_equal(self, other: 'ParamSnapshot'):
-        assert self.name == other.name
         assert self.annotation == other.annotation
         assert self.default == other.default
 
 
 @dataclass(slots=True)
 class MethodSnapshot:
-    name: str
     parameters: Dict[str, ParamSnapshot]
     return_annotation: type
 
     @classmethod
-    def from_inspect(cls, name: str, method: MethodType):
+    def from_inspect(cls, method: MethodType):
         signature = inspect.signature(method)
         parameters = {}
         for param_name, param in signature.parameters.items():
@@ -99,10 +97,10 @@ def from_inspect(cls, name: str, method: MethodType):
         return_annotation = signature.return_annotation
         if isinstance(return_annotation, str):
             return_annotation = eval(return_annotation)
-        return cls(name, parameters, return_annotation)
+        return cls(parameters, return_annotation)
 
     @classmethod
-    def from_docstring(cls, name: str, method: MethodType):
+    def from_docstring(cls, method: MethodType):
         doc = docstring_parser.parse(method.__doc__)
         parameters = {}
         for param in doc.params:
@@ -112,7 +110,7 @@ def from_docstring(cls, name: str, method: MethodType):
             return_annotation = None
         else:
             return_annotation = eval(doc.returns.type_name)
-        return cls(name, parameters, return_annotation)
+        return cls(parameters, return_annotation)
 
     @classmethod
     def from_dict(cls, d: dict):
@@ -132,13 +130,23 @@ def to_dict(self):
         d['return_annotation'] = repr_annotation(d['return_annotation'])
         return d
 
+    def merge(self, other: 'MethodSnapshot'):
+        assert self.parameters.keys().isdisjoint(other.parameters.keys())
+        self.parameters.update(copy.deepcopy(other.parameters))
+        assert self.return_annotation == other.return_annotation
+
     def assert_equal(self, other: 'MethodSnapshot'):
-        assert self.name == other.name
         assert self.parameters.keys() == other.parameters.keys()
         for name, param in self.parameters.items():
             param.assert_equal(other.parameters[name])
         assert self.return_annotation == other.return_annotation
 
+    def assert_containing(self, other: 'MethodSnapshot'):
+        for name, param in other.parameters.items():
+            assert name in self.parameters
+            self.parameters[name].assert_equal(param)
+        assert self.return_annotation == other.return_annotation
+
 
 @dataclass(slots=True)
 class ClassSnapshot:
@@ -153,16 +161,14 @@ def from_inspect(cls, snapshot_cls: type):
                 inst, predicate=inspect.ismethod):
             if method_name.startswith("_") and method_name != "__init__":
                 continue
-            methods[method_name] = MethodSnapshot.from_inspect(
-                method_name, method)
+            methods[method_name] = MethodSnapshot.from_inspect(method)
         properties = {}
         for prop_name, prop in inspect.getmembers(
                 snapshot_cls, predicate=lambda x: isinstance(x, property)):
             if prop_name.startswith("_"):
                 continue
             annotation = inspect.signature(prop.fget).return_annotation
-            properties[prop_name] = ParamSnapshot(prop_name, annotation,
-                                                  inspect._empty)
+            properties[prop_name] = ParamSnapshot(annotation, inspect._empty)
         return cls(methods, properties)
 
     @classmethod
@@ -175,10 +181,9 @@ def from_docstring(cls, snapshot_cls: type):
                 continue
             if method_name == "__init__":
                 methods["__init__"] = MethodSnapshot.from_docstring(
-                    "__init__", snapshot_cls)
+                    snapshot_cls)
             else:
-                methods[method_name] = MethodSnapshot.from_docstring(
-                    method_name, method)
+                methods[method_name] = MethodSnapshot.from_docstring(method)
         properties = {}
         doc = docstring_parser.parse(snapshot_cls.__doc__)
         for param in doc.params:
@@ -210,6 +215,19 @@ def to_dict(self):
         }
         return d
 
+    def merge(self, other: 'ClassSnapshot'):
+        for name, method in self.methods.items():
+            if name in other.methods:
+                method.merge(other.methods[name])
+        new_methods = {
+            name: method
+            for name, method in other.methods.items()
+            if name not in self.methods
+        }
+        self.methods.update(copy.deepcopy(new_methods))
+        assert self.properties.keys().isdisjoint(other.properties.keys())
+        self.properties.update(copy.deepcopy(other.properties))
+
     def assert_equal(self, other: 'ClassSnapshot'):
         assert self.methods.keys() == other.methods.keys()
         for name, method in self.methods.items():
@@ -218,30 +236,47 @@ def assert_equal(self, other: 'ClassSnapshot'):
         for name, prop in self.properties.items():
             prop.assert_equal(other.properties[name])
 
+    def assert_containing(self, other: 'ClassSnapshot'):
+        for name, method in other.methods.items():
+            assert name in self.methods
+            self.methods[name].assert_containing(method)
+        for name, prop in other.properties.items():
+            assert name in self.properties
+            self.properties[name].assert_equal(prop)
+
 
 class ApiStabilityTestHarness:
     TEST_CLASS = None
+    REFERENCE_COMMITTED_DIR = f"{os.path.dirname(__file__)}/references_committed"
     REFERENCE_DIR = f"{os.path.dirname(__file__)}/references"
     REFERENCE_FILE = None
 
-    @classmethod
-    def reference_path(cls):
-        return f"{cls.REFERENCE_DIR}/{cls.REFERENCE_FILE}"
-
     @classmethod
     def setup_class(cls):
-        with open(cls.reference_path()) as f:
+        with open(f"{cls.REFERENCE_DIR}/{cls.REFERENCE_FILE}") as f:
             cls.reference = ClassSnapshot.from_dict(yaml.safe_load(f))
-        cls.error_msg = (
-            f"API stability validation failed. "
-            f"This is probably because you changed {cls.TEST_CLASS.__name__}'s APIs, please ask for reviews from the code owners."
-        )
+        if os.path.exists(
+                f"{cls.REFERENCE_COMMITTED_DIR}/{cls.REFERENCE_FILE}"):
+            with open(
+                    f"{cls.REFERENCE_COMMITTED_DIR}/{cls.REFERENCE_FILE}") as f:
+                cls.reference_committed = ClassSnapshot.from_dict(
+                    yaml.safe_load(f))
+            cls.reference.merge(cls.reference_committed)
+        else:
+            cls.reference_committed = None
+        cls.error_msg = f"API validation failed because you changed {cls.TEST_CLASS.__name__}'s APIs, please ask for reviews from the code owners."
+        cls.error_msg_committed = f"API validation failed because you changed {cls.TEST_CLASS.__name__}'s committed APIs, please ask for approval."
 
     def create_snapshot_from_inspect(self):
         return ClassSnapshot.from_inspect(self.TEST_CLASS)
 
     def test_signature(self):
         snapshot = self.create_snapshot_from_inspect()
+        if self.reference_committed is not None:
+            try:
+                snapshot.assert_containing(self.reference_committed)
+            except AssertionError as e:
+                raise AssertionError(self.error_msg_committed) from e
         try:
             snapshot.assert_equal(self.reference)
         except AssertionError as e:
@@ -252,6 +287,11 @@ def create_snapshot_from_docstring(self):
 
     def test_docstring(self):
         snapshot = self.create_snapshot_from_docstring()
+        if self.reference_committed is not None:
+            try:
+                snapshot.assert_containing(self.reference_committed)
+            except AssertionError as e:
+                raise AssertionError(self.error_msg_committed) from e
         try:
             snapshot.assert_equal(self.reference)
         except AssertionError as e:
 
@@ -1,26 +1,20 @@
 methods:
   __call__:
-    name: __call__
     parameters:
       client_ids:
         annotation: List[Optional[int]]
         default: inspect._empty
-        name: client_ids
       logits:
         annotation: List[torch.Tensor]
         default: inspect._empty
-        name: logits
       req_ids:
         annotation: List[int]
         default: inspect._empty
-        name: req_ids
       stream_ptr:
         annotation: int
         default: inspect._empty
-        name: stream_ptr
       token_ids:
         annotation: List[List[List[int]]]
         default: inspect._empty
-        name: token_ids
     return_annotation: None
 properties: {}
@@ -1,46 +1,35 @@
 methods:
   __init__:
-    name: __init__
     parameters:
       calib_batch_size:
         annotation: int
         default: 1
-        name: calib_batch_size
       calib_batches:
         annotation: int
         default: 512
-        name: calib_batches
       calib_dataset:
         annotation: str
         default: cnn_dailymail
-        name: calib_dataset
       calib_max_seq_length:
         annotation: int
         default: 512
-        name: calib_max_seq_length
       device:
         annotation: Literal['cuda', 'cpu']
         default: cuda
-        name: device
       random_seed:
         annotation: int
         default: 1234
-        name: random_seed
       tokenizer_max_seq_length:
         annotation: int
         default: 2048
-        name: tokenizer_max_seq_length
     return_annotation: None
   from_dict:
-    name: from_dict
     parameters:
       config:
         annotation: dict
         default: inspect._empty
-        name: config
     return_annotation: tensorrt_llm.llmapi.llm_utils.CalibConfig
   to_dict:
-    name: to_dict
     parameters: {}
     return_annotation: dict
 properties: {}