triton-inference-server · ziqifan617 · Apr 1, 2025
diff --git a/qa/L0_response_cache/ensemble_cache_test.py b/qa/L0_response_cache/ensemble_cache_test.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python3
-# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -48,8 +48,8 @@ def setUp(self):
         self.triton_client = grpcclient.InferenceServerClient(
             "localhost:8001", verbose=True
         )
-        self.ensemble_model = "simple_graphdef_float32_float32_float32"
-        self.composing_model = "graphdef_float32_float32_float32"
+        self.ensemble_model = "simple_onnx_float32_float32_float32"
+        self.composing_model = "onnx_float32_float32_float32"
         self.model_directory = os.path.join(os.getcwd(), "models", "ensemble_models")
         self.ensemble_config_file = os.path.join(
             self.model_directory, self.ensemble_model, "config.pbtxt"
@@ -125,7 +125,7 @@ def _run_inference_and_validate(self, model):
         Helper function that takes model as a parameter to verify the corresponding model's stats
         The passed model is composing model for test case `test_ensemble_composing_model_cache_enabled`
         For other testcases, the top-level ensemble model stats are verified.
-            * loads the simple_graphdef_float32_float32_float32 and graphdef_float32_float32_float32
+            * loads the simple_onnx_float32_float32_float32 and onnx_float32_float32_float32
               and verifies if they are loaded properly.
             * Checks the initial statistics of the model passed in the parameter
               Expected - baseline statistics to be all empty metrics since

diff --git a/qa/L0_response_cache/test.sh b/qa/L0_response_cache/test.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -64,8 +64,8 @@ ENSEMBLE_CACHE_COMPOSING_DECOUPLED="${MODEL_DIR}/ensemble_cache_composing_decoup
 rm -fr ${ENSEMBLE_MODEL_DIR} && mkdir ${ENSEMBLE_MODEL_DIR}
 rm -fr ${ENSEMBLE_CACHE_DECOUPLED} && mkdir ${ENSEMBLE_CACHE_DECOUPLED}
 rm -fr ${ENSEMBLE_CACHE_COMPOSING_DECOUPLED} && mkdir ${ENSEMBLE_CACHE_COMPOSING_DECOUPLED}
-ENSEMBLE_MODEL="simple_graphdef_float32_float32_float32"
-COMPOSING_MODEL="graphdef_float32_float32_float32"
+ENSEMBLE_MODEL="simple_onnx_float32_float32_float32"
+COMPOSING_MODEL="onnx_float32_float32_float32"
 
 cp -r "/data/inferenceserver/${REPO_VERSION}/qa_ensemble_model_repository/qa_model_repository/${ENSEMBLE_MODEL}" "${ENSEMBLE_MODEL_DIR}/${ENSEMBLE_MODEL}"
 cp -r "/data/inferenceserver/${REPO_VERSION}/qa_model_repository/${COMPOSING_MODEL}" "${ENSEMBLE_MODEL_DIR}/${COMPOSING_MODEL}"