triton-inference-server
diff --git a/‎model_analyzer/config/generate/automatic_model_config_generator.py‎
Lines changed: 12 additions & 20 deletions b/‎model_analyzer/config/generate/automatic_model_config_generator.py‎
Lines changed: 12 additions & 20 deletions
diff --git a/‎model_analyzer/config/generate/base_model_config_generator.py‎
Lines changed: 13 additions & 41 deletions b/‎model_analyzer/config/generate/base_model_config_generator.py‎
Lines changed: 13 additions & 41 deletions
diff --git a/‎model_analyzer/config/generate/brute_run_config_generator.py‎
Lines changed: 4 additions & 4 deletions b/‎model_analyzer/config/generate/brute_run_config_generator.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎model_analyzer/config/generate/model_profile_spec.py‎
Lines changed: 54 additions & 0 deletions b/‎model_analyzer/config/generate/model_profile_spec.py‎
Lines changed: 54 additions & 0 deletions
diff --git a/‎model_analyzer/config/generate/perf_analyzer_config_generator.py‎
Lines changed: 2 additions & 2 deletions b/‎model_analyzer/config/generate/perf_analyzer_config_generator.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py‎
Lines changed: 9 additions & 4 deletions b/‎model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py‎
Lines changed: 9 additions & 4 deletions
@@ -15,23 +15,25 @@
 from .base_model_config_generator import BaseModelConfigGenerator
 
 from model_analyzer.constants import LOGGER_NAME, DEFAULT_CONFIG_PARAMS
-import logging
 from model_analyzer.model_analyzer_exceptions import TritonModelAnalyzerException
+from .model_profile_spec import ModelProfileSpec
+import logging
 
 logger = logging.getLogger(LOGGER_NAME)
 
 
 class AutomaticModelConfigGenerator(BaseModelConfigGenerator):
     """ Given a model, generates model configs in automatic search mode """
 
-    def __init__(self, config, gpus, model, client, model_variant_name_manager,
-                 default_only, early_exit_enable):
+    def __init__(self, config, gpus, model: ModelProfileSpec, client,
+                 model_variant_name_manager, default_only, early_exit_enable):
         """
         Parameters
         ----------
         config: ModelAnalyzerConfig
         gpus: List of GPUDevices
-        model: The model to generate ModelConfigs for
+        model: ModelProfileSpec
+            The model to generate ModelConfigs for
         client: TritonClient
         model_variant_name_manager: ModelVariantNameManager
         default_only: Bool
@@ -54,9 +56,6 @@ def __init__(self, config, gpus, model, client, model_variant_name_manager,
         self._curr_instance_count = self._min_instance_count
         self._curr_max_batch_size = 0
 
-        self._max_batch_size_disabled = self._determine_max_batch_size_disabled(
-        )
-
         self._reset_max_batch_size()
 
         if not self._early_exit_enable:
@@ -102,10 +101,10 @@ def _max_batch_size_limit_reached(self):
     def _reset_max_batch_size(self):
         super()._reset_max_batch_size()
 
-        if self._max_batch_size_disabled:
-            self._curr_max_batch_size = self._max_model_batch_size
-        else:
+        if self._base_model.supports_batching():
             self._curr_max_batch_size = self._min_model_batch_size
+        else:
+            self._curr_max_batch_size = self._max_model_batch_size
 
     def _get_next_model_config(self):
         param_combo = self._get_curr_param_combo()
@@ -123,17 +122,10 @@ def _get_curr_param_combo(self):
             }]
         }
 
-        if not self._max_batch_size_disabled:
+        if self._base_model.supports_batching():
             config['max_batch_size'] = self._curr_max_batch_size
+
+        if self._base_model.supports_dynamic_batching():
             config['dynamic_batching'] = {}
 
         return config
-
-    def _determine_max_batch_size_disabled(self):
-        config = BaseModelConfigGenerator.get_base_model_config_dict(
-            self._config, self._client, self._gpus, self._model_repository,
-            self._base_model_name)
-        max_batch_size_disabled = False
-        if "max_batch_size" not in config or config['max_batch_size'] == 0:
-            max_batch_size_disabled = True
-        return max_batch_size_disabled
@@ -12,11 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from model_analyzer.result.run_config_measurement import RunConfigMeasurement
 from .config_generator_interface import ConfigGeneratorInterface
-
+from typing import List, Optional
 from model_analyzer.constants import LOGGER_NAME
 from model_analyzer.triton.model.model_config import ModelConfig
-
+from .model_profile_spec import ModelProfileSpec
 import abc
 import logging
 
@@ -26,14 +27,15 @@
 class BaseModelConfigGenerator(ConfigGeneratorInterface):
     """ Base class for generating model configs """
 
-    def __init__(self, config, gpus, model, client, model_variant_name_manager,
-                 default_only, early_exit_enable):
+    def __init__(self, config, gpus, model: ModelProfileSpec, client,
+                 model_variant_name_manager, default_only, early_exit_enable):
         """
         Parameters
         ----------
         config: ModelAnalyzerConfig
         gpus: List of GPUDevices
-        model: The model to generate ModelConfigs for
+        model: ModelProfileSpec
+            The model to generate ModelConfigs for
         client: TritonClient
         model_variant_name_manager: ModelVariantNameManager
         default_only: Bool
@@ -43,10 +45,8 @@ def __init__(self, config, gpus, model, client, model_variant_name_manager,
             If true, the generator can early exit if throughput plateaus
         """
         self._config = config
-        self._gpus = gpus
         self._client = client
         self._model_variant_name_manager = model_variant_name_manager
-        self._model_repository = config.model_repository
         self._base_model = model
         self._base_model_name = model.model_name()
         self._remote_mode = config.triton_launch_mode == 'remote'
@@ -56,11 +56,11 @@ def __init__(self, config, gpus, model, client, model_variant_name_manager,
         self._model_name_index = 0
         self._generator_started = False
         self._max_batch_size_warning_printed = False
-        self._last_results = []
+        self._last_results: List[RunConfigMeasurement] = []
         # Contains the max throughput from each provided list of measurements
         # since the last time we stepped max_batch_size
         #
-        self._curr_max_batch_size_throughputs = []
+        self._curr_max_batch_size_throughputs: List[float] = []
 
     def _is_done(self):
         """ Returns true if this generator is done generating configs """
@@ -119,7 +119,7 @@ def _last_results_increased_throughput(self):
             lastest_throughput > prev_throughput
             for prev_throughput in self._curr_max_batch_size_throughputs[:-1])
 
-    def _get_last_results_max_throughput(self):
+    def _get_last_results_max_throughput(self) -> Optional[float]:
         throughputs = [
             m.get_non_gpu_metric_value('perf_throughput')
             for m in self._last_results
@@ -144,16 +144,12 @@ def _make_remote_model_config(self):
     def _make_direct_mode_model_config(self, param_combo):
         return BaseModelConfigGenerator.make_model_config(
             param_combo=param_combo,
-            config=self._config,
-            client=self._client,
-            gpus=self._gpus,
             model=self._base_model,
-            model_repository=self._model_repository,
             model_variant_name_manager=self._model_variant_name_manager)
 
     @staticmethod
-    def make_model_config(param_combo, config, client, gpus, model,
-                          model_repository, model_variant_name_manager):
+    def make_model_config(param_combo, model: ModelProfileSpec,
+                          model_variant_name_manager):
         """
         Loads the base model config from the model repository, and then applies the
         parameters in the param_combo on top to create and return a new model config
@@ -173,8 +169,7 @@ def make_model_config(param_combo, config, client, gpus, model,
         """
         model_name = model.model_name()
 
-        model_config_dict = BaseModelConfigGenerator.get_base_model_config_dict(
-            config, client, gpus, model_repository, model_name)
+        model_config_dict = model.get_default_config()
 
         logger_str = []
         if param_combo is not None:
@@ -208,29 +203,6 @@ def make_model_config(param_combo, config, client, gpus, model,
 
         return model_config
 
-    @classmethod
-    def get_base_model_config_dict(cls, config, client, gpus, model_repository,
-                                   model_name):
-        """
-        Attempts to create a base model config dict from config.pbtxt, if one exists
-        If the config.pbtxt is not present, we will load a Triton Server with the
-        base model and have it create a default config for MA, if possible
-
-        Parameters:
-        -----------
-        config: ModelAnalyzerConfig
-        client: TritonClient
-        gpus: List of GPUDevices
-        model_repository: str
-            path to the model repository on the file system
-        model_name: str
-            name of the base model
-        """
-        model_config_dict = ModelConfig.create_model_config_dict(
-            config, client, gpus, model_repository, model_name)
-
-        return model_config_dict
-
     def _reset_max_batch_size(self):
         self._max_batch_size_warning_printed = False
         self._curr_max_batch_size_throughputs = []
 
@@ -12,14 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import List, Optional
+from typing import List
 
 from .config_generator_interface import ConfigGeneratorInterface
 from model_analyzer.config.run.run_config import RunConfig
 from model_analyzer.model_analyzer_exceptions import TritonModelAnalyzerException
+from model_analyzer.config.generate.model_profile_spec import ModelProfileSpec
 from model_analyzer.config.generate.model_run_config_generator import ModelRunConfigGenerator
 from model_analyzer.config.generate.model_variant_name_manager import ModelVariantNameManager
-from model_analyzer.result.run_config_measurement import RunConfigMeasurement
 
 
 class BruteRunConfigGenerator(ConfigGeneratorInterface):
@@ -30,9 +30,9 @@ class BruteRunConfigGenerator(ConfigGeneratorInterface):
     def __init__(self,
                  config,
                  gpus,
-                 models,
+                 models: List[ModelProfileSpec],
                  client,
-                 model_variant_name_manager,
+                 model_variant_name_manager: ModelVariantNameManager,
                  skip_default_config: bool = False):
         """
         Parameters
 
@@ -0,0 +1,54 @@
+# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from copy import deepcopy
+from typing import List
+from model_analyzer.config.input.config_command_profile import ConfigCommandProfile
+from model_analyzer.config.input.objects.config_model_profile_spec import ConfigModelProfileSpec
+from model_analyzer.triton.client.client import TritonClient
+from model_analyzer.triton.model.model_config import ModelConfig
+from model_analyzer.device.gpu_device import GPUDevice
+
+
+class ModelProfileSpec(ConfigModelProfileSpec):
+    """
+    The profile configuration and default model config for a single model to be profiled
+    """
+
+    def __init__(self, spec: ConfigModelProfileSpec,
+                 config: ConfigCommandProfile, client: TritonClient,
+                 gpus: List[GPUDevice]):
+        self.__dict__ = deepcopy(spec.__dict__)
+
+        self._default_model_config = ModelConfig.create_model_config_dict(
+            config, client, gpus, config.model_repository, spec.model_name())
+
+    def get_default_config(self) -> dict:
+        """ Returns the default configuration for this model """
+        return deepcopy(self._default_model_config)
+
+    def supports_batching(self) -> bool:
+        """ Returns True if this model supports batching. Else False """
+        if "max_batch_size" not in self._default_model_config or self._default_model_config[
+                'max_batch_size'] == 0:
+            return False
+        return True
+
+    def supports_dynamic_batching(self) -> bool:
+        """ Returns True if this model supports dynamic batching. Else False """
+        supports_dynamic_batching = self.supports_batching()
+
+        if "sequence_batching" in self._default_model_config:
+            supports_dynamic_batching = False
+        return supports_dynamic_batching
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import List, Union
+from typing import List, Union, Optional
 
 from model_analyzer.config.input.config_command_profile import ConfigCommandProfile
 
@@ -121,7 +121,7 @@ def set_last_results(self, measurements: List[Union[RunConfigMeasurement,
         # Remove 'NONE' cases, and find single max measurement from the list
         valid_measurements = [m for m in measurements if m]
 
-        measurement: List[Union[RunConfigMeasurement, None]] = [None]
+        measurement: List[Optional[RunConfigMeasurement]] = [None]
         if valid_measurements:
             measurement = [max(valid_measurements)]
 
 
@@ -24,7 +24,7 @@
 from model_analyzer.triton.client.client import TritonClient
 from model_analyzer.device.gpu_device import GPUDevice
 from model_analyzer.config.input.config_command_profile import ConfigCommandProfile
-from model_analyzer.config.input.objects.config_model_profile_spec import ConfigModelProfileSpec
+from model_analyzer.config.generate.model_profile_spec import ModelProfileSpec
 from model_analyzer.result.result_manager import ResultManager
 from model_analyzer.result.run_config_measurement import RunConfigMeasurement
 from model_analyzer.result.run_config_result import RunConfigResult
@@ -47,7 +47,7 @@ class QuickPlusConcurrencySweepRunConfigGenerator(ConfigGeneratorInterface):
 
     def __init__(self, search_config: SearchConfig,
                  config: ConfigCommandProfile, gpus: List[GPUDevice],
-                 models: List[ConfigModelProfileSpec], client: TritonClient,
+                 models: List[ModelProfileSpec], client: TritonClient,
                  result_manager: ResultManager,
                  model_variant_name_manager: ModelVariantNameManager):
         """
@@ -171,8 +171,13 @@ def _set_parameters(self, result: RunConfigResult,
     # We will need to create a yaml config to set each models
     # batch size/instances seperately
     def _find_batch_size(self, result: RunConfigResult) -> int:
-        return result.run_config().model_run_configs()[0].model_config(
-        ).get_config()['max_batch_size']
+        mc = result.run_config().model_run_configs()[0].model_config(
+        ).get_config()
+
+        batch_size = 1
+        if 'max_batch_size' in mc:
+            batch_size = mc['max_batch_size']
+        return batch_size
 
     def _find_instance_count(self, result: RunConfigResult) -> int:
         return result.run_config().model_run_configs()[0].model_config(